You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/07 23:26:18 UTC

svn commit: r1490837 - in /hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec: ExecDriver.java Operator.java vector/VectorMapOperator.java

Author: hashutosh
Date: Fri Jun  7 21:26:18 2013
New Revision: 1490837

URL: http://svn.apache.org/r1490837
Log:
HIVE-4599 : VectorGroupByOperator steals the non-vectorized children and crashes query if vectorization fails (Jitendra Nath Pandey via Ashutosh Chauhan)

Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=1490837&r1=1490836&r2=1490837&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Fri Jun  7 21:26:18 2013
@@ -285,7 +285,7 @@ public class ExecDriver extends Task<Map
 
     if (vectorPath) {
       if (validateVectorPath()) {
-        System.out.println("Going down the vectorization path");
+        LOG.debug("Going down the vectorization path");
         job.setMapperClass(VectorExecMapper.class);
       } else {
         //fall back to non-vector mode
@@ -533,8 +533,7 @@ public class ExecDriver extends Task<Map
   }
 
   private boolean validateVectorPath() {
-    System.out.println("Validating if vectorized execution is applicable");
-    LOG.info("Validating if vectorized execution is applicable");
+    LOG.debug("Validating if vectorized execution is applicable");
     MapredWork thePlan = this.getWork();
 
     for (String path : thePlan.getPathToPartitionInfo().keySet()) {
@@ -542,9 +541,7 @@ public class ExecDriver extends Task<Map
       List<Class<?>> interfaceList =
           Arrays.asList(pd.getInputFileFormatClass().getInterfaces());
       if (!interfaceList.contains(VectorizedInputFormatInterface.class)) {
-        System.out.println("Input format: " + pd.getInputFileFormatClassName()
-            + ", doesn't provide vectorized input");
-        LOG.info("Input format: " + pd.getInputFileFormatClassName()
+        LOG.debug("Input format: " + pd.getInputFileFormatClassName()
             + ", doesn't provide vectorized input");
         return false;
       }
@@ -559,21 +556,18 @@ public class ExecDriver extends Task<Map
         try {
           vectorOp = VectorMapOperator.vectorizeOperator(op, vc);
         } catch (Exception e) {
-          LOG.info("Cannot vectorize the plan", e);
-          System.out.println("Cannot vectorize the plan: "+ e);
+          LOG.debug("Cannot vectorize the plan", e);
           return false;
         }
         if (vectorOp == null) {
-          LOG.info("Cannot vectorize the plan");
-          System.out.println("Cannot vectorize the plan");
+          LOG.debug("Cannot vectorize the plan");
           return false;
         }
         //verify the expressions contained in the operators
         try {
           validateVectorOperator(vectorOp);
         } catch (HiveException e) {
-          LOG.info("Cannot vectorize the plan", e);
-          System.out.println("Cannot vectorize the plan: "+e.getMessage());
+          LOG.debug("Cannot vectorize the plan", e);
           return false;
         }
       }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1490837&r1=1490836&r2=1490837&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Fri Jun  7 21:26:18 2013
@@ -1414,6 +1414,60 @@ public abstract class Operator<T extends
     return ret;
   }
 
+  /**
+   * Clones only the operator. The children and parent are set
+   * to null.
+   * @return Cloned operator
+   * @throws CloneNotSupportedException
+   */
+  public Operator<? extends OperatorDesc> cloneOp() throws CloneNotSupportedException {
+    T descClone = (T) conf.clone();
+    Operator<? extends OperatorDesc> ret =
+        (Operator<? extends OperatorDesc>) OperatorFactory.getAndMakeChild(
+            descClone, getSchema());
+    return ret;
+  }
+
+  /**
+   * Recursively clones all the children of the tree,
+   * Fixes the pointers to children, parents and the pointers to itself coming from the children.
+   * It does not fix the pointers to itself coming from parents, parents continue to point to
+   * the original child.
+   * @return Cloned operator
+   * @throws CloneNotSupportedException
+   */
+  public Operator<? extends OperatorDesc> cloneRecursiveChildren()
+      throws CloneNotSupportedException {
+    Operator<? extends OperatorDesc> newOp = this.cloneOp();
+    newOp.setParentOperators(this.parentOperators);
+    // Fix parent in all children
+    if (this.getChildOperators() == null) {
+      newOp.setChildOperators(null);
+      return newOp;
+    }
+    List<Operator<? extends OperatorDesc>> newChildren =
+        new ArrayList<Operator<? extends OperatorDesc>>();
+
+    for (Operator<? extends OperatorDesc> childOp : this.getChildOperators()) {
+      List<Operator<? extends OperatorDesc>> parentList =
+          new ArrayList<Operator<? extends OperatorDesc>>();
+      for (Operator<? extends OperatorDesc> parent : childOp.getParentOperators()) {
+        if (parent.equals(this)) {
+          parentList.add(newOp);
+        } else {
+          parentList.add(parent);
+        }
+      }
+      // Recursively clone the children
+      Operator<? extends OperatorDesc> clonedChildOp = childOp.cloneRecursiveChildren();
+      clonedChildOp.setParentOperators(parentList);
+    }
+
+    newOp.setChildOperators(newChildren);
+    return newOp;
+  }
+
+
   /*
    * True only for operators which produce atmost 1 output row per input
    * row to it. This will allow the output column names to be directly

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java?rev=1490837&r1=1490836&r2=1490837&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java Fri Jun  7 21:26:18 2013
@@ -503,7 +503,7 @@ public class VectorMapOperator extends O
             op.getConf());
         break;
       case TABLESCAN:
-        vectorOp = op.clone();
+        vectorOp = op.cloneOp();
         break;
       case REDUCESINK:
         vectorOp = new VectorReduceSinkOperator(vectorizationContext, op.getConf());
@@ -530,14 +530,18 @@ public class VectorMapOperator extends O
         vectorOp.setChildOperators(vectorizedChildren);
       }
     } else {
-      // transfer the row-mode clients to the vectorized op parent
-      List<Operator<? extends OperatorDesc>> children = op.getChildOperators();
-      if (children != null && !children.isEmpty()) {
+      // transfer the row-mode children to the vectorized op parent
+      List<Operator<? extends OperatorDesc>> children =
+          new ArrayList<Operator<? extends OperatorDesc>>();
+
+      if (op.getChildOperators() != null && !op.getChildOperators().isEmpty()) {
         List<Operator<? extends OperatorDesc>> parentList =
             new ArrayList<Operator<? extends OperatorDesc>>();
         parentList.add(vectorOp);
-        for (Operator<? extends OperatorDesc> childOp : children) {
-          childOp.setParentOperators(parentList);
+        for (Operator<? extends OperatorDesc> childOp : op.getChildOperators()) {
+          Operator<? extends OperatorDesc> clonedOp = childOp.cloneRecursiveChildren();
+          clonedOp.setParentOperators(parentList);
+          children.add(clonedOp);
         }
         vectorOp.setChildOperators(children);
       }