You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2012/03/23 03:38:12 UTC

svn commit: r1304167 - in /hive/trunk: hbase-handler/src/java/org/apache/hadoop/hive/hbase/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/org/apache/...

Author: hashutosh
Date: Fri Mar 23 02:38:12 2012
New Revision: 1304167

URL: http://svn.apache.org/viewvc?rev=1304167&view=rev
Log:
HIVE-2773: HiveStorageHandler.configureTableJobProperites() should let the handler know wether it is configuration for input or output (Francis Liu via Ashutosh Chauhan)

Modified:
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java Fri Mar 23 02:38:12 2012
@@ -255,6 +255,19 @@ public class HBaseStorageHandler extends
   }
 
   @Override
+  public void configureInputJobProperties(
+    TableDesc tableDesc,
+    Map<String, String> jobProperties) {
+      configureTableJobProperties(tableDesc, jobProperties);
+  }
+
+  @Override
+  public void configureOutputJobProperties(
+    TableDesc tableDesc,
+    Map<String, String> jobProperties) {
+      configureTableJobProperties(tableDesc, jobProperties);
+  }
+
   public void configureTableJobProperties(
     TableDesc tableDesc,
     Map<String, String> jobProperties) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Fri Mar 23 02:38:12 2012
@@ -762,7 +762,7 @@ public class FileSinkOperator extends Te
 
   @Override
   public void augmentPlan() {
-    PlanUtils.configureTableJobPropertiesForStorageHandler(
+    PlanUtils.configureOutputJobPropertiesForStorageHandler(
         getConf().getTableInfo());
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java Fri Mar 23 02:38:12 2012
@@ -68,14 +68,25 @@ public class DefaultStorageHandler imple
      return new DefaultHiveAuthorizationProvider();
   }
 
+   @Override
+  public void configureInputJobProperties(TableDesc tableDesc,
+                                          Map<String, String> jobProperties) {
+    // do nothing by default
+  }
+
   @Override
-  public void configureTableJobProperties(
-    TableDesc tableDesc,
-    Map<String, String> jobProperties) {
+  public void configureOutputJobProperties(TableDesc tableDesc,
+                                           Map<String, String> jobProperties) {
     // do nothing by default
   }
 
   @Override
+  public void configureTableJobProperties(TableDesc tableDesc,
+                                          Map<String, String> jobProperties) {
+    //do nothing by default
+  }
+
+  @Override
   public Configuration getConf() {
     return conf;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java Fri Mar 23 02:38:12 2012
@@ -78,6 +78,49 @@ public interface HiveStorageHandler exte
     throws HiveException;
 
   /**
+   * This method is called to allow the StorageHandlers the chance
+   * to populate the JobContext.getConfiguration() with properties that
+   * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
+   * Key value pairs passed into jobProperties are guaranteed to be set in the job's
+   * configuration object. User's can retrieve "context" information from tableDesc.
+   * User's should avoid mutating tableDesc and only make changes in jobProperties.
+   * This method is expected to be idempotent such that a job called with the
+   * same tableDesc values should return the same key-value pairs in jobProperties.
+   * Any external state set by this method should remain the same if this method is
+   * called again. It is up to the user to determine how best guarantee this invariant.
+   *
+   * This method in particular is to create a configuration for input.
+   * @param tableDesc descriptor for the table being accessed
+   * @param jobProperties receives properties copied or transformed
+   * from the table properties
+   */
+  public abstract void configureInputJobProperties(TableDesc tableDesc,
+    Map<String, String> jobProperties);
+
+  /**
+   * This method is called to allow the StorageHandlers the chance
+   * to populate the JobContext.getConfiguration() with properties that
+   * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
+   * Key value pairs passed into jobProperties are guaranteed to be set in the job's
+   * configuration object. User's can retrieve "context" information from tableDesc.
+   * User's should avoid mutating tableDesc and only make changes in jobProperties.
+   * This method is expected to be idempotent such that a job called with the
+   * same tableDesc values should return the same key-value pairs in jobProperties.
+   * Any external state set by this method should remain the same if this method is
+   * called again. It is up to the user to determine how best guarantee this invariant.
+   *
+   * This method in particular is to create a configuration for output.
+   * @param tableDesc descriptor for the table being accessed
+   * @param jobProperties receives properties copied or transformed
+   * from the table properties
+   */
+  public abstract void configureOutputJobProperties(TableDesc tableDesc,
+    Map<String, String> jobProperties);
+
+  /**
+   * Deprecated use configureInputJobProperties/configureOutputJobProperties
+   * methods instead.
+   *
    * Configures properties for a job based on the definition of the
    * source or target table it accesses.
    *
@@ -86,6 +129,7 @@ public interface HiveStorageHandler exte
    * @param jobProperties receives properties copied or transformed
    * from the table properties
    */
+  @Deprecated
   public void configureTableJobProperties(
     TableDesc tableDesc,
     Map<String, String> jobProperties);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Mar 23 02:38:12 2012
@@ -7028,7 +7028,7 @@ public class SemanticAnalyzer extends Ba
 
       if (noMapRed) {
         if (fetch.getTblDesc() != null) {
-          PlanUtils.configureTableJobPropertiesForStorageHandler(
+          PlanUtils.configureInputJobPropertiesForStorageHandler(
             fetch.getTblDesc());
         } else if ( (fetch.getPartDesc() != null) && (!fetch.getPartDesc().isEmpty())){
             PartitionDesc pd0 = fetch.getPartDesc().get(0);
@@ -7036,7 +7036,7 @@ public class SemanticAnalyzer extends Ba
             if ((td != null)&&(td.getProperties() != null)
                 && td.getProperties().containsKey(
                     org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE)){
-              PlanUtils.configureTableJobPropertiesForStorageHandler(td);
+              PlanUtils.configureInputJobPropertiesForStorageHandler(td);
             }
         }
         fetchTask = (FetchTask) TaskFactory.get(fetch, conf);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java Fri Mar 23 02:38:12 2012
@@ -129,7 +129,7 @@ public class MapredLocalWork implements 
       if (fetchWork.getTblDesc() == null) {
         continue;
       }
-      PlanUtils.configureTableJobPropertiesForStorageHandler(
+      PlanUtils.configureInputJobPropertiesForStorageHandler(
         fetchWork.getTblDesc());
     }
   }
@@ -208,7 +208,7 @@ public class MapredLocalWork implements 
 
     private String getBaseFileName (String path) {
       try {
-	return ((new Path(path)).getName());
+        return ((new Path(path)).getName());
       } catch (Exception ex) {
         // This could be due to either URI syntax error or File constructor
         // illegal arg; we don't really care which one it is.

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java Fri Mar 23 02:38:12 2012
@@ -266,7 +266,7 @@ public class PartitionDesc implements Se
    *          URI to the partition file
    */
   void deriveBaseFileName(String path) {
-    PlanUtils.configureTableJobPropertiesForStorageHandler(tableDesc);
+    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
 
     if (path == null) {
       return;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Fri Mar 23 02:38:12 2012
@@ -657,11 +657,25 @@ public final class PlanUtils {
 
   /**
    * Loads the storage handler (if one exists) for the given table
-   * and invokes {@link HiveStorageHandler#configureTableJobProperties}.
+   * and invokes {@link HiveStorageHandler#configureInputJobProperties(TableDesc, java.util.Map)}.
    *
    * @param tableDesc table descriptor
    */
-  public static void configureTableJobPropertiesForStorageHandler(
+  public static void configureInputJobPropertiesForStorageHandler(TableDesc tableDesc) {
+      configureJobPropertiesForStorageHandler(true,tableDesc);
+  }
+
+  /**
+   * Loads the storage handler (if one exists) for the given table
+   * and invokes {@link HiveStorageHandler#configureOutputJobProperties(TableDesc, java.util.Map)}.
+   *
+   * @param tableDesc table descriptor
+   */
+  public static void configureOutputJobPropertiesForStorageHandler(TableDesc tableDesc) {
+      configureJobPropertiesForStorageHandler(false,tableDesc);
+  }
+
+  private static void configureJobPropertiesForStorageHandler(boolean input,
     TableDesc tableDesc) {
 
     if (tableDesc == null) {
@@ -676,9 +690,28 @@ public final class PlanUtils {
             org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE));
       if (storageHandler != null) {
         Map<String, String> jobProperties = new LinkedHashMap<String, String>();
-        storageHandler.configureTableJobProperties(
-          tableDesc,
-          jobProperties);
+        if(input) {
+            try {
+                storageHandler.configureInputJobProperties(
+                  tableDesc,
+                  jobProperties);
+            } catch(AbstractMethodError e) {
+                LOG.debug("configureInputJobProperties not found "+
+                    "using configureTableJobProperties",e);
+                storageHandler.configureTableJobProperties(tableDesc, jobProperties);
+            }
+        }
+        else {
+            try {
+                storageHandler.configureOutputJobProperties(
+                  tableDesc,
+                  jobProperties);
+            } catch(AbstractMethodError e) {
+                LOG.debug("configureOutputJobProperties not found"+
+                    "using configureTableJobProperties",e);
+                storageHandler.configureTableJobProperties(tableDesc, jobProperties);
+            }
+        }
         // Job properties are only relevant for non-native tables, so
         // for native tables, leave it null to avoid cluttering up
         // plans.