You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/10/10 17:55:01 UTC
svn commit: r1531033 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/optimizer/physical/
test/org/apache/hadoop/hive/ql/exec/vector/ test/queries/clientpositive/ t...
Author: hashutosh
Date: Thu Oct 10 15:55:01 2013
New Revision: 1531033
URL: http://svn.apache.org/r1531033
Log:
HIVE-4846 : Implement Vectorized Limit Operator (Jitendra Nath Pandey via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java
hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q
hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1531033&r1=1531032&r2=1531033&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Thu Oct 10 15:55:01 2013
@@ -25,6 +25,7 @@ import java.util.Map;
import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
@@ -121,6 +122,7 @@ public final class OperatorFactory {
VectorReduceSinkOperator.class));
vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
+ vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
}
public static <T extends OperatorDesc> Operator<T> getVectorOperator(T conf,
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java Thu Oct 10 15:55:01 2013
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Limit operator implementation Limits the number of rows to be passed on.
+ **/
+public class VectorLimitOperator extends LimitOperator {
+
+ private static final long serialVersionUID = 1L;
+
+ public VectorLimitOperator() {
+ super();
+ }
+
+ public VectorLimitOperator(VectorizationContext vContext, OperatorDesc conf) {
+ this.conf = (LimitDesc) conf;
+ }
+
+ @Override
+ public void processOp(Object row, int tag) throws HiveException {
+ VectorizedRowBatch batch = (VectorizedRowBatch) row;
+
+ if (currCount < limit) {
+ batch.size = Math.min(batch.size, limit - currCount);
+ forward(row, inputObjInspectors[tag]);
+ currCount += batch.size;
+ } else {
+ setDone(true);
+ }
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1531033&r1=1531032&r2=1531033&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Thu Oct 10 15:55:01 2013
@@ -452,6 +452,7 @@ public class Vectorizer implements Physi
break;
case FILESINK:
case TABLESCAN:
+ case LIMIT:
ret = true;
break;
default:
@@ -578,6 +579,7 @@ public class Vectorizer implements Physi
case SELECT:
case FILESINK:
case REDUCESINK:
+ case LIMIT:
vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
break;
default:
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java Thu Oct 10 15:55:01 2013
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.junit.Test;
+
+/**
+ * Unit test for the vectorized LIMIT operator.
+ */
+public class TestVectorLimitOperator {
+
+ @Test
+ public void testLimitLessThanBatchSize() throws HiveException {
+ validateVectorLimitOperator(2, 5, 2);
+ }
+
+ @Test
+ public void testLimitGreaterThanBatchSize() throws HiveException {
+ validateVectorLimitOperator(100, 3, 3);
+ }
+
+ @Test
+ public void testLimitWithZeroBatchSize() throws HiveException {
+ validateVectorLimitOperator(5, 0, 0);
+ }
+
+ private void validateVectorLimitOperator(int limit, int batchSize, int expectedBatchSize)
+ throws HiveException {
+
+ @SuppressWarnings("unchecked")
+ FakeVectorRowBatchFromObjectIterables frboi = new FakeVectorRowBatchFromObjectIterables(
+ batchSize,
+ new String[] {"tinyint", "double"},
+ Arrays.asList(new Object[] {1, 2, 3, 4}),
+ Arrays.asList(new Object[] {323.0, 34.5, null, 89.3}));
+
+ // Get next batch
+ VectorizedRowBatch vrb = frboi.produceNextBatch();
+
+ // Create limit desc with limit value
+ LimitDesc ld = new LimitDesc(limit);
+ VectorLimitOperator lo = new VectorLimitOperator(null, ld);
+ lo.initialize(new Configuration(), null);
+
+ // Process the batch
+ lo.processOp(vrb, 0);
+
+ // Verify batch size
+ Assert.assertEquals(vrb.size, expectedBatchSize);
+ }
+}
+
Added: hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q Thu Oct 10 15:55:01 2013
@@ -0,0 +1,3 @@
+SET hive.vectorized.execution.enabled=true;
+explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;
Added: hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out Thu Oct 10 15:55:01 2013
@@ -0,0 +1,64 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble))) (TOK_WHERE (and (< (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cdouble)) (> (TOK_TABLE_OR_COL cint) 0))) (TOK_LIMIT 7)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ alltypesorc
+ TableScan
+ alias: alltypesorc
+ Filter Operator
+ predicate:
+ expr: ((cbigint < cdouble) and (cint > 0))
+ type: boolean
+ Vectorized execution: true
+ Select Operator
+ expressions:
+ expr: cbigint
+ type: bigint
+ expr: cdouble
+ type: double
+ outputColumnNames: _col0, _col1
+ Vectorized execution: true
+ Limit
+ Vectorized execution: true
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Vectorized execution: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 7
+
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1887561756 1839.0
+-1887561756 -10011.0
+-1887561756 -13877.0
+-1887561756 10361.0
+-1887561756 -8881.0
+-1887561756 -2281.0
+-1887561756 9531.0