You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/18 20:05:34 UTC

svn commit: r1646512 - in /hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql: exec/Utilities.java io/CombineHiveInputFormat.java

Author: xuefu
Date: Thu Dec 18 19:05:33 2014
New Revision: 1646512

URL: http://svn.apache.org/r1646512
Log:
HIVE-9127: Improve CombineHiveInputFormat.getSplit performance (Brock via Xuefu)

Modified:
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1646512&r1=1646511&r2=1646512&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Thu Dec 18 19:05:33 2014
@@ -388,7 +388,6 @@ public final class Utilities {
           in = new InflaterInputStream(in);
         } else {
           LOG.info("Open file to read in plan: " + localPath);
-//          in = new FileInputStream(localPath.toUri().getPath());
           in = localPath.getFileSystem(conf).open(localPath);
         }
 
@@ -427,8 +426,9 @@ public final class Utilities {
       LOG.info("No plan file found: "+path);
       return null;
     } catch (Exception e) {
-      LOG.error("Failed to load plan: "+path, e);
-      throw new RuntimeException(e);
+      String msg = "Failed to load plan: " + path + ": " + e;
+      LOG.error(msg, e);
+      throw new RuntimeException(msg, e);
     } finally {
       if (in != null) {
         try {
@@ -710,11 +710,11 @@ public final class Utilities {
 
       // Cache the plan in this process
       gWorkMap.put(planPath, w);
-
       return planPath;
     } catch (Exception e) {
-      e.printStackTrace();
-      throw new RuntimeException(e);
+      String msg = "Error caching " + name + ": " + e;
+      LOG.error(msg, e);
+      throw new RuntimeException(msg, e);
     }
   }
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1646512&r1=1646511&r2=1646512&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Thu Dec 18 19:05:33 2014
@@ -82,8 +82,9 @@ public class CombineHiveInputFormat<K ex
    */
   public static class CombineHiveInputSplit extends InputSplitShim {
 
-    String inputFormatClassName;
-    CombineFileSplit inputSplitShim;
+    private String inputFormatClassName;
+    private CombineFileSplit inputSplitShim;
+    private Map<String, PartitionDesc> pathToPartitionInfo;
 
     public CombineHiveInputSplit() throws IOException {
       this(ShimLoader.getHadoopShims().getCombineFileInputFormat()
@@ -93,20 +94,25 @@ public class CombineHiveInputFormat<K ex
     public CombineHiveInputSplit(CombineFileSplit inputSplitShim) throws IOException {
       this(inputSplitShim.getJob(), inputSplitShim);
     }
-
     public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim)
         throws IOException {
+      this(job, inputSplitShim, null);
+    }
+    public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
+        Map<String, PartitionDesc> pathToPartitionInfo) throws IOException {
       this.inputSplitShim = inputSplitShim;
+      this.pathToPartitionInfo = pathToPartitionInfo;
       if (job != null) {
-        Map<String, PartitionDesc> pathToPartitionInfo = Utilities
-            .getMapWork(job).getPathToPartitionInfo();
+        if (this.pathToPartitionInfo == null) {
+          this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+        }
 
         // extract all the inputFormatClass names for each chunk in the
         // CombinedSplit.
         Path[] ipaths = inputSplitShim.getPaths();
         if (ipaths.length > 0) {
           PartitionDesc part = HiveFileFormatUtils
-              .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+              .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
                   ipaths[0], IOPrepareCache.get().getPartitionDescMap());
           inputFormatClassName = part.getInputFileFormatClass().getName();
         }
@@ -215,8 +221,9 @@ public class CombineHiveInputFormat<K ex
       inputSplitShim.write(out);
 
       if (inputFormatClassName == null) {
-        Map<String, PartitionDesc> pathToPartitionInfo = Utilities
-            .getMapWork(getJob()).getPathToPartitionInfo();
+        if (pathToPartitionInfo == null) {
+          pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
+        }
 
         // extract all the inputFormatClass names for each chunk in the
         // CombinedSplit.
@@ -268,8 +275,8 @@ public class CombineHiveInputFormat<K ex
   /**
    * Create Hive splits based on CombineFileSplit.
    */
-  private InputSplit[] getCombineSplits(JobConf job,
-                                        int numSplits) throws IOException {
+  private InputSplit[] getCombineSplits(JobConf job, int numSplits, Map<String, PartitionDesc> pathToPartitionInfo)
+      throws IOException {
     PerfLogger perfLogger = PerfLogger.getPerfLogger();
     perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
     init(job);
@@ -438,7 +445,7 @@ public class CombineHiveInputFormat<K ex
     }
 
     for (CombineFileSplit is : iss) {
-      CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
+      CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo);
       result.add(csplit);
     }
 
@@ -505,7 +512,8 @@ public class CombineHiveInputFormat<K ex
     if (combinablePaths.size() > 0) {
       FileInputFormat.setInputPaths(job, combinablePaths.toArray
           (new Path[combinablePaths.size()]));
-      InputSplit[] splits = getCombineSplits(job, numSplits);
+      Map<String, PartitionDesc> pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+      InputSplit[] splits = getCombineSplits(job, numSplits, pathToPartitionInfo);
       for (InputSplit split : splits) {
         result.add(split);
       }