You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/05/10 10:20:30 UTC
svn commit: r1480925 - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
java/org/apache/hadoop/hive/ql/io/orc/
test/org/apache/hadoop/hive/ql/exec/vector/ test/o...
Author: hashutosh
Date: Fri May 10 08:20:29 2013
New Revision: 1480925
URL: http://svn.apache.org/r1480925
Log:
HIVE-4514 : Handle constants in projection (Jitendra Nath Pandey via Ashutosh Chauhan)
Added:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcSerde.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1480925&r1=1480924&r2=1480925&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri May 10 08:20:29 2013
@@ -30,6 +30,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterNotExpr;
@@ -241,7 +242,20 @@ public class VectorizationContext {
private VectorExpression getConstantVectorExpression(ExprNodeConstantDesc exprDesc)
throws HiveException {
- return null;
+ String type = exprDesc.getTypeString();
+ String colVectorType = this.getOutputColType(type, "constant");
+ int outCol = ocm.allocateOutputColumn(colVectorType);
+ if (type.equalsIgnoreCase("long") || type.equalsIgnoreCase("int") ||
+ type.equalsIgnoreCase("short") || type.equalsIgnoreCase("byte")) {
+ return new ConstantVectorExpression(outCol,
+ ((Number) exprDesc.getValue()).longValue());
+ } else if (type.equalsIgnoreCase("double") || type.equalsIgnoreCase("float")) {
+ return new ConstantVectorExpression(outCol, ((Number) exprDesc.getValue()).doubleValue());
+ } else if (type.equalsIgnoreCase("string")) {
+ return new ConstantVectorExpression(outCol, ((String) exprDesc.getValue()).getBytes());
+ } else {
+ throw new HiveException("Unsupported constant type");
+ }
}
private VectorExpression getUnaryMinusExpression(List<ExprNodeDesc> childExprList)
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java?rev=1480925&r1=1480924&r2=1480925&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java Fri May 10 08:20:29 2013
@@ -35,41 +35,41 @@ public class VectorizedRowBatch implemen
public ColumnVector[] cols; // a vector for each column
public int size; // number of rows that qualify (i.e. haven't been filtered out)
public int[] selected; // array of positions of selected values
-
+
/*
* If no filtering has been applied yet, selectedInUse is false,
* meaning that all rows qualify. If it is true, then the selected[] array
* records the offsets of qualifying rows.
*/
- public boolean selectedInUse;
-
+ public boolean selectedInUse;
+
// If this is true, then there is no data in the batch -- we have hit the end of input.
- public boolean endOfFile;
-
- /*
- * This number is carefully chosen to minimize overhead and typically allows
+ public boolean endOfFile;
+
+ /*
+ * This number is carefully chosen to minimize overhead and typically allows
* one VectorizedRowBatch to fit in cache.
*/
- public static final int DEFAULT_SIZE = 1024;
+ public static final int DEFAULT_SIZE = 1024;
- private Writable[] writableRow;
+ private final Writable[] writableRow;
private int rowIteratorIndex = 0;
- /**
+ /**
* Return a batch with the specified number of columns.
* This is the standard constructor -- all batches should be the same size
- *
+ *
* @param numCols the number of columns to include in the batch
*/
public VectorizedRowBatch(int numCols) {
this(numCols, DEFAULT_SIZE);
}
-
+
/**
* Return a batch with the specified number of columns and rows.
* Only call this constructor directly for testing purposes.
* Batch size should normally always be defaultSize.
- *
+ *
* @param numCols the number of columns to include in the batch
* @param size the number of rows to include in the batch
*/
@@ -104,13 +104,13 @@ public class VectorizedRowBatch implemen
return writableRow;
}
- /**
+ /**
* Return count of qualifying rows.
- *
+ *
* @return number of rows that have not been filtered out
*/
public long count() {
- return size;
+ return size;
}
@Override
@@ -124,7 +124,11 @@ public class VectorizedRowBatch implemen
int i = selected[j];
int colIndex = 0;
for (ColumnVector cv : cols) {
- b.append(cv.getWritableObject(i).toString());
+ if (cv.isRepeating) {
+ b.append(cv.getWritableObject(0).toString());
+ } else {
+ b.append(cv.getWritableObject(i).toString());
+ }
colIndex++;
if (colIndex < cols.length) {
b.append('\u0001');
@@ -138,7 +142,11 @@ public class VectorizedRowBatch implemen
for (int i = 0; i < size; i++) {
int colIndex = 0;
for (ColumnVector cv : cols) {
- b.append(cv.getWritableObject(i).toString());
+ if (cv.isRepeating) {
+ b.append(cv.getWritableObject(0).toString());
+ } else {
+ b.append(cv.getWritableObject(i).toString());
+ }
colIndex++;
if (colIndex < cols.length) {
b.append('\u0001');
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java?rev=1480925&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java Fri May 10 08:20:29 2013
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+public class ConstantVectorExpression extends VectorExpression {
+
+ private static enum Type {
+ LONG,
+ DOUBLE,
+ BYTES
+ }
+
+ private final Type type;
+ private final int outputColumn;
+ private long longValue = 0;
+ private double doubleValue = 0;
+ private byte[] bytesValue = null;
+ private int bytesValueLength = 0;
+ private final String typeString;
+
+ ConstantVectorExpression(int outputColumn, String typeString) {
+ this.outputColumn = outputColumn;
+ this.typeString = typeString;
+ if ("string".equalsIgnoreCase(typeString)) {
+ this.type = Type.BYTES;
+ } else if ("double".equalsIgnoreCase(typeString)) {
+ this.type = Type.DOUBLE;
+ } else {
+ this.type = Type.LONG;
+ }
+ }
+
+ public ConstantVectorExpression(int outputColumn, long value) {
+ this(outputColumn, "long");
+ this.longValue = value;
+ }
+
+ public ConstantVectorExpression(int outputColumn, double value) {
+ this(outputColumn, "double");
+ this.doubleValue = value;
+ }
+
+ public ConstantVectorExpression(int outputColumn, byte[] value) {
+ this(outputColumn, "string");
+ this.bytesValue = value;
+ this.bytesValueLength = this.bytesValue.length;
+ }
+
+ private void evaluateLong(VectorizedRowBatch vrg) {
+ LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumn];
+ cv.isRepeating = true;
+ cv.noNulls = true;
+ cv.vector[0] = longValue;
+ }
+
+ private void evaluateDouble(VectorizedRowBatch vrg) {
+ DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumn];
+ cv.isRepeating = true;
+ cv.noNulls = true;
+ cv.vector[0] = doubleValue;
+ }
+
+ private void evaluateBytes(VectorizedRowBatch vrg) {
+ BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumn];
+ cv.isRepeating = true;
+ cv.noNulls = true;
+ cv.setRef(0, bytesValue, 0, bytesValueLength);
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch vrg) {
+ switch (type) {
+ case LONG:
+ evaluateLong(vrg);
+ break;
+ case DOUBLE:
+ evaluateDouble(vrg);
+ break;
+ case BYTES:
+ evaluateBytes(vrg);
+ break;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return typeString;
+ }
+}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java?rev=1480925&r1=1480924&r2=1480925&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java Fri May 10 08:20:29 2013
@@ -24,6 +24,7 @@ import java.util.ArrayList;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.io.NullWritable;
@@ -36,7 +37,7 @@ import org.apache.hadoop.mapred.Reporter
public class CommonOrcInputFormat extends FileInputFormat<NullWritable, Writable>
- implements InputFormatChecker {
+ implements InputFormatChecker, VectorizedInputFormatInterface {
OrcInputFormat oif = new OrcInputFormat();
VectorizedOrcInputFormat voif = new VectorizedOrcInputFormat();
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java?rev=1480925&r1=1480924&r2=1480925&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java Fri May 10 08:20:29 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
@@ -42,7 +43,7 @@ import org.apache.hadoop.mapred.Reporter
* A MapReduce/Hive input format for ORC files.
*/
public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, VectorizedRowBatch>
- implements InputFormatChecker {
+ implements InputFormatChecker, VectorizedInputFormatInterface {
private static class VectorizedOrcRecordReader
implements RecordReader<NullWritable, VectorizedRowBatch> {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcSerde.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcSerde.java?rev=1480925&r1=1480924&r2=1480925&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcSerde.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcSerde.java Fri May 10 08:20:29 2013
@@ -56,7 +56,12 @@ public class VectorizedOrcSerde extends
index = i;
}
for (int k = 0; k < batch.numCols; k++) {
- Writable w = batch.cols[k].getWritableObject(index);
+ Writable w;
+ if (batch.cols[k].isRepeating) {
+ w = batch.cols[k].getWritableObject(0);
+ } else {
+ w = batch.cols[k].getWritableObject(index);
+ }
ost.setFieldValue(k, w);
}
OrcSerdeRow row = (OrcSerdeRow) orcRowArray[i];
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java?rev=1480925&r1=1480924&r2=1480925&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java Fri May 10 08:20:29 2013
@@ -14,6 +14,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColMultiplyLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColSubtractLongColumn;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -30,7 +31,7 @@ import org.junit.Test;
public class TestVectorizationContext {
@Test
- public void testArithmeticExpressionVectorization() {
+ public void testArithmeticExpressionVectorization() throws HiveException {
/**
* Create original expression tree for following
* (plus (minus (plus col1 col2) col3) (multiply col4 (mod col5 col6)) )
@@ -119,7 +120,7 @@ public class TestVectorizationContext {
}
@Test
- public void testStringFilterExpressions() {
+ public void testStringFilterExpressions() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java?rev=1480925&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java Fri May 10 08:20:29 2013
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Test;
+
+public class TestConstantVectorExpression {
+
+ @Test
+ public void testConstantExpression() {
+ ConstantVectorExpression longCve = new ConstantVectorExpression(0, 17);
+ ConstantVectorExpression doubleCve = new ConstantVectorExpression(1, 17.34);
+ ConstantVectorExpression bytesCve = new ConstantVectorExpression(2, "alpha".getBytes());
+
+ int size = 20;
+ VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(size, 3, 0);
+
+ LongColumnVector lcv = (LongColumnVector) vrg.cols[0];
+ DoubleColumnVector dcv = new DoubleColumnVector(size);
+ BytesColumnVector bcv = new BytesColumnVector(size);
+ vrg.cols[1] = dcv;
+ vrg.cols[2] = bcv;
+
+ longCve.evaluate(vrg);
+ doubleCve.evaluate(vrg);
+ bytesCve.evaluate(vrg);
+
+ assertTrue(lcv.isRepeating);
+ assertTrue(dcv.isRepeating);
+ assertTrue(bcv.isRepeating);
+ assertEquals(17, lcv.vector[0]);
+ assertTrue(17.34 == dcv.vector[0]);
+ assertTrue(Arrays.equals("alpha".getBytes(), bcv.vector[0]));
+ }
+
+}