You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/22 22:58:09 UTC
svn commit: r1485419 [3/4] - in /hive/branches/vectorization:
metastore/src/java/org/apache/hadoop/hive/metastore/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/
ql/src/java/org/apache/hadoop/hive/ql/exec...
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java Wed May 22 20:58:08 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.
VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -53,6 +54,15 @@ public class VectorUDAFSumLong extends V
static private final class Aggregation implements AggregationBuffer {
long sum;
boolean isNull;
+
+ public void sumValue(long value) {
+ if (isNull) {
+ sum = value;
+ isNull = false;
+ } else {
+ sum += value;
+ }
+ }
}
VectorExpression inputExpression;
@@ -63,17 +73,207 @@ public class VectorUDAFSumLong extends V
this.inputExpression = inputExpression;
result = new LongWritable();
}
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ LongColumnVector inputVector = (LongColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+ long[] vector = inputVector.vector;
+
+ if (inputVector.noNulls) {
+ if (inputVector.isRepeating) {
+ iterateNoNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector[0], batchSize);
+ } else {
+ if (batch.selectedInUse) {
+ iterateNoNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batch.selected, batchSize);
+ } else {
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batchSize);
+ }
+ }
+ } else {
+ if (inputVector.isRepeating) {
+ if (batch.selectedInUse) {
+ iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector[0], batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector[0], batchSize, inputVector.isNull);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batchSize, inputVector.isNull);
+ }
+ }
+ }
+ }
+
+ private void iterateNoNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long value,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ private void iterateNoNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] values,
+ int[] selection,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(values[selection[i]]);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] values,
+ int batchSize) {
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+
+ private void iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long value,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[selection[i]]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ }
+
+ private void iterateHasNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long value,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] values,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int j=0; j < batchSize; ++j) {
+ int i = selection[j];
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] values,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
@Override
- public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
throws HiveException {
- inputExpression.evaluate(unit);
+ inputExpression.evaluate(batch);
- LongColumnVector inputVector = (LongColumnVector)unit.
+ LongColumnVector inputVector = (LongColumnVector)batch.
cols[this.inputExpression.getOutputColumn()];
- int batchSize = unit.size;
+ int batchSize = batch.size;
if (batchSize == 0) {
return;
@@ -94,17 +294,17 @@ public class VectorUDAFSumLong extends V
return;
}
- if (!unit.selectedInUse && inputVector.noNulls) {
+ if (!batch.selectedInUse && inputVector.noNulls) {
iterateNoSelectionNoNulls(myagg, vector, batchSize);
}
- else if (!unit.selectedInUse) {
+ else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
}
else if (inputVector.noNulls){
- iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
}
else {
- iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java Wed May 22 20:58:08 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
.VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -50,13 +51,13 @@ public class VectorUDAFVarPopDouble exte
/**
/* class for storing the current aggregate value.
*/
- static private final class Aggregation implements AggregationBuffer {
+ private static final class Aggregation implements AggregationBuffer {
double sum;
long count;
double variance;
boolean isNull;
- public void init () {
+ public void init() {
isNull = false;
sum = 0;
count = 0;
@@ -86,7 +87,7 @@ public class VectorUDAFVarPopDouble exte
initPartialResultInspector();
}
- private void initPartialResultInspector () {
+ private void initPartialResultInspector() {
ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
@@ -99,17 +100,200 @@ public class VectorUDAFVarPopDouble exte
soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
}
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
+
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ DoubleColumnVector inputVector = (DoubleColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ double[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector[0], batchSize);
+ }
+ }
+ else if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize,
+ inputVector.isNull, batch.selected);
+ }
+
+ }
+ private void iterateRepeatingNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double value,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize,
+ int[] selected) {
+
+ for (int i=0; i< batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ double value = vector[selected[i]];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateNoSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ double value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
@Override
- public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
throws HiveException {
- inputExpression.evaluate(unit);
+ inputExpression.evaluate(batch);
- DoubleColumnVector inputVector = (DoubleColumnVector)unit.
+ DoubleColumnVector inputVector = (DoubleColumnVector)batch.
cols[this.inputExpression.getOutputColumn()];
- int batchSize = unit.size;
+ int batchSize = batch.size;
if (batchSize == 0) {
return;
@@ -124,17 +308,17 @@ public class VectorUDAFVarPopDouble exte
iterateRepeatingNoNulls(myagg, vector[0], batchSize);
}
}
- else if (!unit.selectedInUse && inputVector.noNulls) {
+ else if (!batch.selectedInUse && inputVector.noNulls) {
iterateNoSelectionNoNulls(myagg, vector, batchSize);
}
- else if (!unit.selectedInUse) {
+ else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
}
else if (inputVector.noNulls){
- iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
}
else {
- iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java Wed May 22 20:58:08 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
.VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -50,13 +51,13 @@ public class VectorUDAFVarPopLong extend
/**
/* class for storing the current aggregate value.
*/
- static private final class Aggregation implements AggregationBuffer {
+ private static final class Aggregation implements AggregationBuffer {
double sum;
long count;
double variance;
boolean isNull;
- public void init () {
+ public void init() {
isNull = false;
sum = 0;
count = 0;
@@ -86,7 +87,7 @@ public class VectorUDAFVarPopLong extend
initPartialResultInspector();
}
- private void initPartialResultInspector () {
+ private void initPartialResultInspector() {
ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
@@ -99,17 +100,200 @@ public class VectorUDAFVarPopLong extend
soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
}
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
+
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ LongColumnVector inputVector = (LongColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ long[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector[0], batchSize);
+ }
+ }
+ else if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize,
+ inputVector.isNull, batch.selected);
+ }
+
+ }
+ private void iterateRepeatingNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long value,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ int i = selected[j];
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize,
+ int[] selected) {
+
+ for (int i=0; i< batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ long value = vector[selected[i]];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateNoSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ long value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
@Override
- public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
throws HiveException {
- inputExpression.evaluate(unit);
+ inputExpression.evaluate(batch);
- LongColumnVector inputVector = (LongColumnVector)unit.
+ LongColumnVector inputVector = (LongColumnVector)batch.
cols[this.inputExpression.getOutputColumn()];
- int batchSize = unit.size;
+ int batchSize = batch.size;
if (batchSize == 0) {
return;
@@ -124,17 +308,17 @@ public class VectorUDAFVarPopLong extend
iterateRepeatingNoNulls(myagg, vector[0], batchSize);
}
}
- else if (!unit.selectedInUse && inputVector.noNulls) {
+ else if (!batch.selectedInUse && inputVector.noNulls) {
iterateNoSelectionNoNulls(myagg, vector, batchSize);
}
- else if (!unit.selectedInUse) {
+ else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
}
else if (inputVector.noNulls){
- iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
}
else {
- iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampDouble.java?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampDouble.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampDouble.java Wed May 22 20:58:08 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
.VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -50,13 +51,13 @@ public class VectorUDAFVarSampDouble ext
/**
/* class for storing the current aggregate value.
*/
- static private final class Aggregation implements AggregationBuffer {
+ private static final class Aggregation implements AggregationBuffer {
double sum;
long count;
double variance;
boolean isNull;
- public void init () {
+ public void init() {
isNull = false;
sum = 0;
count = 0;
@@ -86,7 +87,7 @@ public class VectorUDAFVarSampDouble ext
initPartialResultInspector();
}
- private void initPartialResultInspector () {
+ private void initPartialResultInspector() {
ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
@@ -99,17 +100,200 @@ public class VectorUDAFVarSampDouble ext
soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
}
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
+
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ DoubleColumnVector inputVector = (DoubleColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ double[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector[0], batchSize);
+ }
+ }
+ else if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize,
+ inputVector.isNull, batch.selected);
+ }
+
+ }
+ private void iterateRepeatingNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double value,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize,
+ int[] selected) {
+
+ for (int i=0; i< batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ double value = vector[selected[i]];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateNoSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double[] vector,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ double value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
@Override
- public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
throws HiveException {
- inputExpression.evaluate(unit);
+ inputExpression.evaluate(batch);
- DoubleColumnVector inputVector = (DoubleColumnVector)unit.
+ DoubleColumnVector inputVector = (DoubleColumnVector)batch.
cols[this.inputExpression.getOutputColumn()];
- int batchSize = unit.size;
+ int batchSize = batch.size;
if (batchSize == 0) {
return;
@@ -124,17 +308,17 @@ public class VectorUDAFVarSampDouble ext
iterateRepeatingNoNulls(myagg, vector[0], batchSize);
}
}
- else if (!unit.selectedInUse && inputVector.noNulls) {
+ else if (!batch.selectedInUse && inputVector.noNulls) {
iterateNoSelectionNoNulls(myagg, vector, batchSize);
}
- else if (!unit.selectedInUse) {
+ else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
}
else if (inputVector.noNulls){
- iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
}
else {
- iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampLong.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampLong.java?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampLong.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarSampLong.java Wed May 22 20:58:08 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
.VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -50,13 +51,13 @@ public class VectorUDAFVarSampLong exten
/**
/* class for storing the current aggregate value.
*/
- static private final class Aggregation implements AggregationBuffer {
+ private static final class Aggregation implements AggregationBuffer {
double sum;
long count;
double variance;
boolean isNull;
- public void init () {
+ public void init() {
isNull = false;
sum = 0;
count = 0;
@@ -86,7 +87,7 @@ public class VectorUDAFVarSampLong exten
initPartialResultInspector();
}
- private void initPartialResultInspector () {
+ private void initPartialResultInspector() {
ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
@@ -99,17 +100,200 @@ public class VectorUDAFVarSampLong exten
soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
}
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
+
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ LongColumnVector inputVector = (LongColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ long[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector[0], batchSize);
+ }
+ }
+ else if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize,
+ inputVector.isNull, batch.selected);
+ }
+
+ }
+ private void iterateRepeatingNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long value,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ int i = selected[j];
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize,
+ int[] selected) {
+
+ for (int i=0; i< batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ long value = vector[selected[i]];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateNoSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ long[] vector,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ long value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
@Override
- public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
throws HiveException {
- inputExpression.evaluate(unit);
+ inputExpression.evaluate(batch);
- LongColumnVector inputVector = (LongColumnVector)unit.
+ LongColumnVector inputVector = (LongColumnVector)batch.
cols[this.inputExpression.getOutputColumn()];
- int batchSize = unit.size;
+ int batchSize = batch.size;
if (batchSize == 0) {
return;
@@ -124,17 +308,17 @@ public class VectorUDAFVarSampLong exten
iterateRepeatingNoNulls(myagg, vector[0], batchSize);
}
}
- else if (!unit.selectedInUse && inputVector.noNulls) {
+ else if (!batch.selectedInUse && inputVector.noNulls) {
iterateNoSelectionNoNulls(myagg, vector, batchSize);
}
- else if (!unit.selectedInUse) {
+ else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
}
else if (inputVector.noNulls){
- iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
}
else {
- iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java Wed May 22 20:58:08 2013
@@ -269,6 +269,16 @@ public class CodeGen {
generateColumnArithmeticColumn(tdesc);
} else if (tdesc[0].equals("ColumnUnaryMinus")) {
generateColumnUnaryMinus(tdesc);
+ } else if (tdesc[0].equals("VectorUDAFCount")) {
+ generateVectorUDAFCount(tdesc);
+ } else if (tdesc[0].equals("VectorUDAFMinMax")) {
+ generateVectorUDAFMinMax(tdesc);
+ } else if (tdesc[0].equals("VectorUDAFSum")) {
+ generateVectorUDAFSum(tdesc);
+ } else if (tdesc[0].equals("VectorUDAFAvg")) {
+ generateVectorUDAFAvg(tdesc);
+ } else if (tdesc[0].equals("VectorUDAFVar")) {
+ generateVectorUDAFVar(tdesc);
} else if (tdesc[0].equals("FilterStringColumnCompareScalar")) {
generateFilterStringColumnCompareScalar(tdesc);
} else if (tdesc[0].equals("FilterStringColumnCompareColumn")) {
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFAvg.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFAvg.txt?rev=1485419&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFAvg.txt (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFAvg.txt Wed May 22 20:58:08 2013
@@ -0,0 +1,440 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.
+ VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+
+import org.apache.hadoop.hive.ql.io.orc.*;
+
+@Description(name = "avg", value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: <ValueType>)")
+public class <ClassName> extends VectorAggregateExpression {
+
+ /** class for storing the current aggregate value. */
+ static class Aggregation implements AggregationBuffer {
+ <ValueType> sum;
+ long count;
+ boolean isNull;
+
+ public void sumValue(<ValueType> value) {
+ if (isNull) {
+ sum = value;
+ count = 1;
+ isNull = false;
+ } else {
+ sum += value;
+ count++;
+ }
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private Object[] partialResult;
+ private LongWritable resultCount;
+ private DoubleWritable resultSum;
+ private StructObjectInspector soi;
+
+ public <ClassName>(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ partialResult = new Object[2];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector() {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex);
+ return myagg;
+ }
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ LongColumnVector inputVector = (LongColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+ long[] vector = inputVector.vector;
+
+ if (inputVector.noNulls) {
+ if (inputVector.isRepeating) {
+ iterateNoNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector[0], batchSize);
+ } else {
+ if (batch.selectedInUse) {
+ iterateNoNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batch.selected, batchSize);
+ } else {
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batchSize);
+ }
+ }
+ } else {
+ if (inputVector.isRepeating) {
+ if (batch.selectedInUse) {
+ iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector[0], batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector[0], batchSize, inputVector.isNull);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batchSize, inputVector.isNull);
+ }
+ }
+ }
+ }
+
+ private void iterateNoNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ long value,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ private void iterateNoNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ long[] values,
+ int[] selection,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(values[selection[i]]);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ long[] values,
+ int batchSize) {
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+
+ private void iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ long value,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[selection[i]]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ }
+
+ private void iterateHasNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ long value,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ long[] values,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int j=0; j < batchSize; ++j) {
+ int i = selection[j];
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ j);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ long[] values,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+ myagg.sum += vector[0]*batchSize;
+ myagg.count += batchSize;
+ }
+ return;
+ }
+
+ if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+
+ for (int i=0; i< batchSize; ++i) {
+ <ValueType> value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize) {
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+
+ for (int i=0;i<batchSize;++i) {
+ <ValueType> value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ return partialResult;
+ }
+ }
+
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+}
+
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFCount.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFCount.txt?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFCount.txt (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFCount.txt Wed May 22 20:58:08 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.
VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -52,6 +53,13 @@ public class <ClassName> extends VectorA
static class Aggregation implements AggregationBuffer {
long value;
boolean isNull;
+
+ public void initIfNull() {
+ if (isNull) {
+ isNull = false;
+ value = 0;
+ }
+ }
}
private VectorExpression inputExpression;
@@ -62,17 +70,112 @@ public class <ClassName> extends VectorA
this.inputExpression = inputExpression;
result = new LongWritable(0);
}
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
@Override
- public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ if (inputVector.noNulls) {
+ // if there are no nulls then the iteration is the same on all cases
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, batchSize);
+ } else if (!batch.selectedInUse) {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ batchSize, inputVector.isNull);
+ } else if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ batchSize, batch.selected, inputVector.isNull);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.initIfNull();
+ myagg.value++;
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.initIfNull();
+ myagg.value++;
+ }
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int j=0; j < batchSize; ++j) {
+ int i = selection[j];
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ myagg.initIfNull();
+ myagg.value++;
+ }
+ }
+ }
+
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
throws HiveException {
- inputExpression.evaluate(unit);
+ inputExpression.evaluate(batch);
- <InputColumnVectorType> inputVector = (<InputColumnVectorType>)unit.
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
cols[this.inputExpression.getOutputColumn()];
- int batchSize = unit.size;
+ int batchSize = batch.size;
if (batchSize == 0) {
return;
@@ -80,11 +183,8 @@ public class <ClassName> extends VectorA
Aggregation myagg = (Aggregation)agg;
- if (myagg.isNull) {
- myagg.value = 0;
- myagg.isNull = false;
- }
-
+ myagg.initIfNull();
+
if (inputVector.isRepeating) {
if (inputVector.noNulls || !inputVector.isNull[0]) {
myagg.value += batchSize;
@@ -96,11 +196,11 @@ public class <ClassName> extends VectorA
myagg.value += batchSize;
return;
}
- else if (!unit.selectedInUse) {
+ else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, batchSize, inputVector.isNull);
}
else {
- iterateSelectionHasNulls(myagg, batchSize, inputVector.isNull, unit.selected);
+ iterateSelectionHasNulls(myagg, batchSize, inputVector.isNull, batch.selected);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFMinMax.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFMinMax.txt?rev=1485419&r1=1485418&r2=1485419&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFMinMax.txt (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/VectorUDAFMinMax.txt Wed May 22 20:58:08 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.
VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -53,6 +54,15 @@ public class <ClassName> extends VectorA
static private final class Aggregation implements AggregationBuffer {
<ValueType> value;
boolean isNull;
+
+ public void checkValue(<ValueType> value) {
+ if (isNull) {
+ isNull = false;
+ this.value = value;
+ } else if (value <OperatorSymbol> this.value) {
+ this.value = value;
+ }
+ }
}
private VectorExpression inputExpression;
@@ -64,15 +74,205 @@ public class <ClassName> extends VectorA
result = new <OutputType>();
}
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex);
+ return myagg;
+ }
+
+@Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.noNulls) {
+ if (inputVector.isRepeating) {
+ iterateNoNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector[0], batchSize);
+ } else {
+ if (batch.selectedInUse) {
+ iterateNoNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batch.selected, batchSize);
+ } else {
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batchSize);
+ }
+ }
+ } else {
+ if (inputVector.isRepeating) {
+ if (batch.selectedInUse) {
+ iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector[0], batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector[0], batchSize, inputVector.isNull);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batchSize, inputVector.isNull);
+ }
+ }
+ }
+ }
+
+ private void iterateNoNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType> value,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(value);
+ }
+ }
+
+ private void iterateNoNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int[] selection,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(values[selection[i]]);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int batchSize) {
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(values[i]);
+ }
+ }
+
+ private void iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType> value,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[selection[i]]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(value);
+ }
+ }
+
+ }
+
+ private void iterateHasNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType> value,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(value);
+ }
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int j=0; j < batchSize; ++j) {
+ int i = selection[j];
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ j);
+ myagg.checkValue(values[i]);
+ }
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(values[i]);
+ }
+ }
+ }
+
@Override
- public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit) throws HiveException {
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
+ throws HiveException {
- inputExpression.evaluate(unit);
+ inputExpression.evaluate(batch);
- <InputColumnVectorType> inputVector = (<InputColumnVectorType>)unit.
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
cols[this.inputExpression.getOutputColumn()];
- int batchSize = unit.size;
+ int batchSize = batch.size;
if (batchSize == 0) {
return;
@@ -91,17 +291,17 @@ public class <ClassName> extends VectorA
return;
}
- if (!unit.selectedInUse && inputVector.noNulls) {
+ if (!batch.selectedInUse && inputVector.noNulls) {
iterateNoSelectionNoNulls(myagg, vector, batchSize);
}
- else if (!unit.selectedInUse) {
+ else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
}
else if (inputVector.noNulls){
- iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
}
else {
- iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
}
}