You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/10/10 17:55:01 UTC

svn commit: r1531033 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/optimizer/physical/ test/org/apache/hadoop/hive/ql/exec/vector/ test/queries/clientpositive/ t...

Author: hashutosh
Date: Thu Oct 10 15:55:01 2013
New Revision: 1531033

URL: http://svn.apache.org/r1531033
Log:
HIVE-4846 : Implement Vectorized Limit Operator (Jitendra Nath Pandey via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q
    hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1531033&r1=1531032&r2=1531033&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Thu Oct 10 15:55:01 2013
@@ -25,6 +25,7 @@ import java.util.Map;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
@@ -121,6 +122,7 @@ public final class OperatorFactory {
         VectorReduceSinkOperator.class));
     vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
     vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
+    vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
   }
 
   public static <T extends OperatorDesc> Operator<T> getVectorOperator(T conf,

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java Thu Oct 10 15:55:01 2013
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Limit operator implementation Limits the number of rows to be passed on.
+ **/
+public class VectorLimitOperator extends LimitOperator  {
+
+  private static final long serialVersionUID = 1L;
+
+  public VectorLimitOperator() {
+    super();
+  }
+
+  public VectorLimitOperator(VectorizationContext vContext, OperatorDesc conf) {
+    this.conf = (LimitDesc) conf;
+  }
+
+  @Override
+  public void processOp(Object row, int tag) throws HiveException {
+    VectorizedRowBatch batch = (VectorizedRowBatch) row;
+
+    if (currCount < limit) {
+      batch.size = Math.min(batch.size, limit - currCount);
+      forward(row, inputObjInspectors[tag]);
+      currCount += batch.size;
+    } else {
+      setDone(true);
+    }
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1531033&r1=1531032&r2=1531033&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Thu Oct 10 15:55:01 2013
@@ -452,6 +452,7 @@ public class Vectorizer implements Physi
         break;
       case FILESINK:
       case TABLESCAN:
+      case LIMIT:
         ret = true;
         break;
       default:
@@ -578,6 +579,7 @@ public class Vectorizer implements Physi
       case SELECT:
       case FILESINK:
       case REDUCESINK:
+      case LIMIT:
         vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
         break;
       default:

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java Thu Oct 10 15:55:01 2013
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.junit.Test;
+
+/**
+ * Unit test for the vectorized LIMIT operator.
+ */
+public class TestVectorLimitOperator {
+
+  @Test
+  public void testLimitLessThanBatchSize() throws HiveException {
+    validateVectorLimitOperator(2, 5, 2);
+  }
+
+  @Test
+  public void testLimitGreaterThanBatchSize() throws HiveException {
+    validateVectorLimitOperator(100, 3, 3);
+  }
+
+  @Test
+  public void testLimitWithZeroBatchSize() throws HiveException {
+    validateVectorLimitOperator(5, 0, 0);
+  }
+
+  private void validateVectorLimitOperator(int limit, int batchSize, int expectedBatchSize)
+      throws HiveException {
+
+    @SuppressWarnings("unchecked")
+    FakeVectorRowBatchFromObjectIterables frboi = new FakeVectorRowBatchFromObjectIterables(
+        batchSize,
+        new String[] {"tinyint", "double"},
+        Arrays.asList(new Object[] {1, 2, 3, 4}),
+        Arrays.asList(new Object[] {323.0, 34.5, null, 89.3}));
+
+    // Get next batch
+    VectorizedRowBatch vrb = frboi.produceNextBatch();
+
+    // Create limit desc with limit value
+    LimitDesc ld = new LimitDesc(limit);
+    VectorLimitOperator lo = new VectorLimitOperator(null, ld);
+    lo.initialize(new Configuration(), null);
+
+    // Process the batch
+    lo.processOp(vrb, 0);
+
+    // Verify batch size
+    Assert.assertEquals(vrb.size, expectedBatchSize);
+  }
+}
+

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_limit.q Thu Oct 10 15:55:01 2013
@@ -0,0 +1,3 @@
+SET hive.vectorized.execution.enabled=true;
+explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;

Added: hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out?rev=1531033&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out Thu Oct 10 15:55:01 2013
@@ -0,0 +1,64 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble))) (TOK_WHERE (and (< (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cdouble)) (> (TOK_TABLE_OR_COL cint) 0))) (TOK_LIMIT 7)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        alltypesorc 
+          TableScan
+            alias: alltypesorc
+            Filter Operator
+              predicate:
+                  expr: ((cbigint < cdouble) and (cint > 0))
+                  type: boolean
+              Vectorized execution: true
+              Select Operator
+                expressions:
+                      expr: cbigint
+                      type: bigint
+                      expr: cdouble
+                      type: double
+                outputColumnNames: _col0, _col1
+                Vectorized execution: true
+                Limit
+                  Vectorized execution: true
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Vectorized execution: true
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 7
+
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1887561756	1839.0
+-1887561756	-10011.0
+-1887561756	-13877.0
+-1887561756	10361.0
+-1887561756	-8881.0
+-1887561756	-2281.0
+-1887561756	9531.0