You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2012/03/23 03:38:12 UTC
svn commit: r1304167 - in /hive/trunk:
hbase-handler/src/java/org/apache/hadoop/hive/hbase/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/metadata/
ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/org/apache/...
Author: hashutosh
Date: Fri Mar 23 02:38:12 2012
New Revision: 1304167
URL: http://svn.apache.org/viewvc?rev=1304167&view=rev
Log:
HIVE-2773: HiveStorageHandler.configureTableJobProperites() should let the handler know wether it is configuration for input or output (Francis Liu via Ashutosh Chauhan)
Modified:
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java Fri Mar 23 02:38:12 2012
@@ -255,6 +255,19 @@ public class HBaseStorageHandler extends
}
@Override
+ public void configureInputJobProperties(
+ TableDesc tableDesc,
+ Map<String, String> jobProperties) {
+ configureTableJobProperties(tableDesc, jobProperties);
+ }
+
+ @Override
+ public void configureOutputJobProperties(
+ TableDesc tableDesc,
+ Map<String, String> jobProperties) {
+ configureTableJobProperties(tableDesc, jobProperties);
+ }
+
public void configureTableJobProperties(
TableDesc tableDesc,
Map<String, String> jobProperties) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Fri Mar 23 02:38:12 2012
@@ -762,7 +762,7 @@ public class FileSinkOperator extends Te
@Override
public void augmentPlan() {
- PlanUtils.configureTableJobPropertiesForStorageHandler(
+ PlanUtils.configureOutputJobPropertiesForStorageHandler(
getConf().getTableInfo());
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/DefaultStorageHandler.java Fri Mar 23 02:38:12 2012
@@ -68,14 +68,25 @@ public class DefaultStorageHandler imple
return new DefaultHiveAuthorizationProvider();
}
+ @Override
+ public void configureInputJobProperties(TableDesc tableDesc,
+ Map<String, String> jobProperties) {
+ // do nothing by default
+ }
+
@Override
- public void configureTableJobProperties(
- TableDesc tableDesc,
- Map<String, String> jobProperties) {
+ public void configureOutputJobProperties(TableDesc tableDesc,
+ Map<String, String> jobProperties) {
// do nothing by default
}
@Override
+ public void configureTableJobProperties(TableDesc tableDesc,
+ Map<String, String> jobProperties) {
+ //do nothing by default
+ }
+
+ @Override
public Configuration getConf() {
return conf;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java Fri Mar 23 02:38:12 2012
@@ -78,6 +78,49 @@ public interface HiveStorageHandler exte
throws HiveException;
/**
+ * This method is called to allow the StorageHandlers the chance
+ * to populate the JobContext.getConfiguration() with properties that
+ * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
+ * Key value pairs passed into jobProperties are guaranteed to be set in the job's
+ * configuration object. User's can retrieve "context" information from tableDesc.
+ * User's should avoid mutating tableDesc and only make changes in jobProperties.
+ * This method is expected to be idempotent such that a job called with the
+ * same tableDesc values should return the same key-value pairs in jobProperties.
+ * Any external state set by this method should remain the same if this method is
+ * called again. It is up to the user to determine how best guarantee this invariant.
+ *
+ * This method in particular is to create a configuration for input.
+ * @param tableDesc descriptor for the table being accessed
+ * @param jobProperties receives properties copied or transformed
+ * from the table properties
+ */
+ public abstract void configureInputJobProperties(TableDesc tableDesc,
+ Map<String, String> jobProperties);
+
+ /**
+ * This method is called to allow the StorageHandlers the chance
+ * to populate the JobContext.getConfiguration() with properties that
+ * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
+ * Key value pairs passed into jobProperties are guaranteed to be set in the job's
+ * configuration object. User's can retrieve "context" information from tableDesc.
+ * User's should avoid mutating tableDesc and only make changes in jobProperties.
+ * This method is expected to be idempotent such that a job called with the
+ * same tableDesc values should return the same key-value pairs in jobProperties.
+ * Any external state set by this method should remain the same if this method is
+ * called again. It is up to the user to determine how best guarantee this invariant.
+ *
+ * This method in particular is to create a configuration for output.
+ * @param tableDesc descriptor for the table being accessed
+ * @param jobProperties receives properties copied or transformed
+ * from the table properties
+ */
+ public abstract void configureOutputJobProperties(TableDesc tableDesc,
+ Map<String, String> jobProperties);
+
+ /**
+ * Deprecated use configureInputJobProperties/configureOutputJobProperties
+ * methods instead.
+ *
* Configures properties for a job based on the definition of the
* source or target table it accesses.
*
@@ -86,6 +129,7 @@ public interface HiveStorageHandler exte
* @param jobProperties receives properties copied or transformed
* from the table properties
*/
+ @Deprecated
public void configureTableJobProperties(
TableDesc tableDesc,
Map<String, String> jobProperties);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Mar 23 02:38:12 2012
@@ -7028,7 +7028,7 @@ public class SemanticAnalyzer extends Ba
if (noMapRed) {
if (fetch.getTblDesc() != null) {
- PlanUtils.configureTableJobPropertiesForStorageHandler(
+ PlanUtils.configureInputJobPropertiesForStorageHandler(
fetch.getTblDesc());
} else if ( (fetch.getPartDesc() != null) && (!fetch.getPartDesc().isEmpty())){
PartitionDesc pd0 = fetch.getPartDesc().get(0);
@@ -7036,7 +7036,7 @@ public class SemanticAnalyzer extends Ba
if ((td != null)&&(td.getProperties() != null)
&& td.getProperties().containsKey(
org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE)){
- PlanUtils.configureTableJobPropertiesForStorageHandler(td);
+ PlanUtils.configureInputJobPropertiesForStorageHandler(td);
}
}
fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java Fri Mar 23 02:38:12 2012
@@ -129,7 +129,7 @@ public class MapredLocalWork implements
if (fetchWork.getTblDesc() == null) {
continue;
}
- PlanUtils.configureTableJobPropertiesForStorageHandler(
+ PlanUtils.configureInputJobPropertiesForStorageHandler(
fetchWork.getTblDesc());
}
}
@@ -208,7 +208,7 @@ public class MapredLocalWork implements
private String getBaseFileName (String path) {
try {
- return ((new Path(path)).getName());
+ return ((new Path(path)).getName());
} catch (Exception ex) {
// This could be due to either URI syntax error or File constructor
// illegal arg; we don't really care which one it is.
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java Fri Mar 23 02:38:12 2012
@@ -266,7 +266,7 @@ public class PartitionDesc implements Se
* URI to the partition file
*/
void deriveBaseFileName(String path) {
- PlanUtils.configureTableJobPropertiesForStorageHandler(tableDesc);
+ PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
if (path == null) {
return;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1304167&r1=1304166&r2=1304167&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Fri Mar 23 02:38:12 2012
@@ -657,11 +657,25 @@ public final class PlanUtils {
/**
* Loads the storage handler (if one exists) for the given table
- * and invokes {@link HiveStorageHandler#configureTableJobProperties}.
+ * and invokes {@link HiveStorageHandler#configureInputJobProperties(TableDesc, java.util.Map)}.
*
* @param tableDesc table descriptor
*/
- public static void configureTableJobPropertiesForStorageHandler(
+ public static void configureInputJobPropertiesForStorageHandler(TableDesc tableDesc) {
+ configureJobPropertiesForStorageHandler(true,tableDesc);
+ }
+
+ /**
+ * Loads the storage handler (if one exists) for the given table
+ * and invokes {@link HiveStorageHandler#configureOutputJobProperties(TableDesc, java.util.Map)}.
+ *
+ * @param tableDesc table descriptor
+ */
+ public static void configureOutputJobPropertiesForStorageHandler(TableDesc tableDesc) {
+ configureJobPropertiesForStorageHandler(false,tableDesc);
+ }
+
+ private static void configureJobPropertiesForStorageHandler(boolean input,
TableDesc tableDesc) {
if (tableDesc == null) {
@@ -676,9 +690,28 @@ public final class PlanUtils {
org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE));
if (storageHandler != null) {
Map<String, String> jobProperties = new LinkedHashMap<String, String>();
- storageHandler.configureTableJobProperties(
- tableDesc,
- jobProperties);
+ if(input) {
+ try {
+ storageHandler.configureInputJobProperties(
+ tableDesc,
+ jobProperties);
+ } catch(AbstractMethodError e) {
+ LOG.debug("configureInputJobProperties not found "+
+ "using configureTableJobProperties",e);
+ storageHandler.configureTableJobProperties(tableDesc, jobProperties);
+ }
+ }
+ else {
+ try {
+ storageHandler.configureOutputJobProperties(
+ tableDesc,
+ jobProperties);
+ } catch(AbstractMethodError e) {
+ LOG.debug("configureOutputJobProperties not found"+
+ "using configureTableJobProperties",e);
+ storageHandler.configureTableJobProperties(tableDesc, jobProperties);
+ }
+ }
// Job properties are only relevant for non-native tables, so
// for native tables, leave it null to avoid cluttering up
// plans.