You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by vi...@apache.org on 2012/08/15 20:54:51 UTC

svn commit: r1373568 - in /incubator/oozie/trunk: ./ core/src/main/java/org/apache/oozie/action/hadoop/ docs/src/site/twiki/

Author: virag
Date: Wed Aug 15 18:54:51 2012
New Revision: 1373568

URL: http://svn.apache.org/viewvc?rev=1373568&view=rev
Log:
OOZIE-889 Adding HCat credentials class for job conf (mona via virag)

Added:
    incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentialHelper.java
    incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentials.java
    incubator/oozie/trunk/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki
Modified:
    incubator/oozie/trunk/docs/src/site/twiki/index.twiki
    incubator/oozie/trunk/release-log.txt

Added: incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentialHelper.java
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentialHelper.java?rev=1373568&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentialHelper.java (added)
+++ incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentialHelper.java Wed Aug 15 18:54:51 2012
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.oozie.action.hadoop;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.security.token.Token;
+import org.apache.oozie.util.XLog;
+
+/**
+ * Helper class to handle the HCat credentials
+ * Performs internally the heavy-lifting of fetching delegation tokens from Hive Metastore, abstracted from the user
+ * Token is added to jobConf
+ */
+public class HCatCredentialHelper {
+
+    private static final String USER_NAME = "user.name";
+    // Some Hive Metastore properties
+    private static final String HIVE_METASTORE_SASL_ENABLED = "hive.metastore.sasl.enabled";
+    private static final String HIVE_METASTORE_KERBEROS_PRINCIPAL = "hive.metastore.kerberos.principal";
+    private static final String HIVE_METASTORE_LOCAL = "hive.metastore.local";
+
+    /**
+     * This Function will set the HCat token to jobconf
+     * @param launcherJobConf - job conf
+     * @param principal - principal for HCat server
+     * @param server - Serevr URI for HCat server
+     * @throws Exception
+     */
+    public void set(JobConf launcherJobConf, String principal, String server) throws Exception {
+        try {
+            HiveMetaStoreClient client = getHCatClient(principal, server);
+            XLog.getLog(getClass()).debug(
+                    "HCatCredentialHelper: set: User name for which token will be asked from HCat: "
+                            + launcherJobConf.get(USER_NAME));
+            String tokenStrForm = client.getDelegationToken(launcherJobConf.get(USER_NAME));
+            Token<DelegationTokenIdentifier> hcatToken = new Token<DelegationTokenIdentifier>();
+            hcatToken.decodeFromUrlString(tokenStrForm);
+            launcherJobConf.getCredentials().addToken(new Text("HCat Token"), hcatToken);
+            XLog.getLog(getClass()).debug("Added the HCat token in job conf");
+        }
+        catch (Exception ex) {
+            XLog.getLog(getClass()).debug("set Exception" + ex.getMessage());
+            throw ex;
+        }
+    }
+
+    /**
+     * Getting the HCat client.
+     * @param principal
+     * @param server
+     * @return HiveMetaStoreClient
+     * @throws MetaException
+     */
+    public HiveMetaStoreClient getHCatClient(String principal, String server) throws MetaException {
+        HiveConf hiveConf = null;
+        HiveMetaStoreClient hiveclient = null;
+        hiveConf = new HiveConf();
+        XLog.getLog(getClass()).debug("getHCatClient: Principal: " + principal + " Server: " + server);
+        // specified a thrift url
+       
+        hiveConf.set(HIVE_METASTORE_SASL_ENABLED, "true");
+        hiveConf.set(HIVE_METASTORE_KERBEROS_PRINCIPAL, principal);
+        hiveConf.set(HIVE_METASTORE_LOCAL, "false");
+        hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, server);
+        hiveclient = new HiveMetaStoreClient(hiveConf);
+        return hiveclient;
+    }
+}

Added: incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentials.java
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentials.java?rev=1373568&view=auto
==============================================================================
--- incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentials.java (added)
+++ incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HCatCredentials.java Wed Aug 15 18:54:51 2012
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.oozie.action.hadoop;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.oozie.ErrorCode;
+import org.apache.oozie.action.ActionExecutor.Context;
+import org.apache.oozie.action.hadoop.CredentialException;
+import org.apache.oozie.action.hadoop.Credentials;
+import org.apache.oozie.action.hadoop.CredentialsProperties;
+import org.apache.oozie.util.XLog;
+
+/**
+ * Credentials implementation to store in jobConf, HCat-specific properties such as Principal and Uri
+ * User specifies these credential properties along with the action configuration
+ * The jobConf is used further to pass credentials to the tasks while running
+ * Oozie server should be configured to use this Credentials class by including it via property 'oozie.credentials.credentialclasses'
+ * User can extend the parent class to implement own class as well
+ * for handling custom token-based credentials and add to the above server property
+ */
+public class HCatCredentials extends Credentials {
+
+    private static final String HCAT_METASTORE_PRINCIPAL = "hcat.metastore.principal";
+    private static final String HCAT_METASTORE_URI = "hcat.metastore.uri";
+
+    /* (non-Javadoc)
+     * @see org.apache.oozie.action.hadoop.Credentials#addtoJobConf(org.apache.hadoop.mapred.JobConf, org.apache.oozie.action.hadoop.CredentialsProperties, org.apache.oozie.action.ActionExecutor.Context)
+     */
+    @Override
+    public void addtoJobConf(JobConf jobconf, CredentialsProperties props, Context context) throws Exception {
+        try {
+            String principal = props.getProperties().get(HCAT_METASTORE_PRINCIPAL);
+            if (principal == null || principal.isEmpty()) {
+                throw new CredentialException(ErrorCode.E0510,
+                        HCAT_METASTORE_PRINCIPAL + " is required to get hcat credential");
+            }
+            String server = props.getProperties().get(HCAT_METASTORE_URI);
+            if (server == null || server.isEmpty()) {
+                throw new CredentialException(ErrorCode.E0510,
+                        HCAT_METASTORE_URI + " is required to get hcat credential");
+            }
+            HCatCredentialHelper hcch = new HCatCredentialHelper();
+            hcch.set(jobconf, principal, server);
+        }
+        catch (Exception e) {
+            XLog.getLog(getClass()).warn("Exception in addtoJobConf", e);
+            throw e;
+        }
+    }
+}

Added: incubator/oozie/trunk/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki?rev=1373568&view=auto
==============================================================================
--- incubator/oozie/trunk/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki (added)
+++ incubator/oozie/trunk/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki Wed Aug 15 18:54:51 2012
@@ -0,0 +1,124 @@
+<noautolink>
+
+[[index][::Go back to Oozie Documentation Index::]]
+
+---+!! Unified Credentials Module for Oozie
+
+%TOC%
+
+---++ Background
+
+Oozie is a workflow scheduling solution for pure Grid processing that needs to support the different job types existing in a Grid environment (M/R, PIG, Streaming, HDFS, etc.). This scheduling system is data aware, extensible, scalable and light-weight. As Oozie is envisioned as a geteway for the grid for all the batch processing needs, it has to be aware of all other data processing systems which are getting used or will be used in the future for these purposes.
+
+As Secure Hadoop is being used for the data processing then all components which have been built on hadoop will be using the same/different model for security needs and have their own security model to authenticate users. Now all the jobs are going through Oozie for hadoop and then for these systems, Oozie should be having a singular interface and support for different implementations of these credentials modules. Using those Oozie will authenticate users with all those systems and run job seamlessly.
+
+Lets take an Example, User has a system lets call it ABC, which he wants to use for running his job. Now it has same policy like hadoop for delegation token for running job or that system provides certificates for running that job. So user should have way to plugin their system's credentials policy in Oozie in order to run those jobs.
+
+This module facilitates users to provide credentials for any other systems user may want to use for running their jobs through Oozie if they follow the same interface and provide the implementation for those systems.
+
+---++ Options
+
+We have couple of options for implementation that are as follows:
+
+   * Introduce separate actions ahead of all workflow applications which need specific authentication.
+   * Oozie will get credentials for user based on configuration in each action.
+   
+Following section will discuss about their pros and cons and why we chose the second option.
+
+---++ Option 1 : Separate Actions for Credentials
+
+In this option Oozie would have introduced multiple authentication actions and User will be using those actions ahead of their workflows to first get all the necessary credentials and pass those credentials to all the underneath actions in the workflows. For Example if user wants to use M/R actions and Pig Actions using ABC system then they first need to add ABC Action ahead of MR and Pig Actions and then oozie server will run ABC action on the gateway(oozie server) and provide all the necessary credentials to following actions.
+
+---+++ Shortcomings
+
+This is a nice approach however there are couple of shortcomings with this approach those are as follows.
+
+   * In this approach, there would only be one delegation token for all the actions in the workflow. However, if workflows have long running actions then that token has a potential problem of expiration because of which all the subsequent actions will fail due to authentication reason. The one solution to this approach is to add more time out which is a static number and will be configured at the workflow level (if interface is exposed from underneath system; if not then cant be done this way). which will add more load to the underneath authenticator servers in case of short running actions.
+   * There is another overhead of running one extra action per workflow.
+   
+---++ Option 2 : Getting Credentials in each action
+
+The solution to above mentioned problem is to make each action responsible for its own needs, in this case credential token for different systems. Currently too it is implemented in such a way for name node and job tracker. Every actions gets the token for itself for hdfs.
+
+In this approach user will provide configuration for each workflow for all the needed/available credentials modules as well as user will also provide for each action, what are the credentials needed. Every action before running will call the appropriate credential modules to get the tokens and pass them in job conf for the tasks.
+
+---+++ Shortcomings
+
+Shortcoming to this approach is every action has to authenticate itself but as of now there is no other way we can avoid that because of Token expiration problem. Perhaps one workflow may now authenticate many times with the same service, and that puts load on the auth service. There could be a de-authentication step after the action finishes in the future, if this turns out to be a problem.
+
+---+++ Assumptions
+
+We have one assumption in this approach which is to pass the delegation tokens in the job conf. Without jobconf this approach will not work. However we use jobconf for passing the Namenode and Jobtracker token . So without jobconf we need to rearchitect that design as well. For now its safe to assume we will have job conf.
+
+---++ User Interface Changes
+
+User has to add following configuration to their workflow.xml. Please find below workflow xml for reference.
+
+<verbatim>
+   <workflow-app xmlns='uri:oozie:workflow:0.1' name='pig-wf'>
+      ...
+      <credentials>
+	    <credential name='howlauth' type='hcat'>
+	      <property>
+	        <name>hcat.metastore.uri</name>
+	        <value>HCAT_URI</value>
+	      </property>
+	      <property> 
+	        <name>hcat.metastore.principal</name>
+	        <value>HCAT_PRINCIPAL</value>
+	      </property>
+	    </credential>
+	  </credentials>
+	  ...
+      <action name='pig' cred='howlauth'>
+        <pig>
+          <job-tracker>JT</job-tracker>
+          <name-node>NN</name-node>
+          <configuration>
+             <property>
+                <name>TESTING</name>
+                <value>${start}</value>
+             </property>
+          </configuration>
+        </pig>
+      </action>
+      ...
+   </workflow-app>
+</verbatim>
+
+---++ Changes in oozie-default.xml
+
+If User wants to plugin the new Authentication module for their needs, they have to specify that in oozie-default.xml under the following property oozie.credentials.credentialclasses ABC=oorg.apache.oozie.action.hadoop.InsertTestToken
+
+---++ Sample Insert Token class implementation
+
+This is the sample class how users can write their Token class
+
+<verbatim>
+public class InsertTestToken extends Credentials{
+
+public InsertTestToken() {
+  }
+
+/* (non-Javadoc)
+ * @see org.apache.oozie.action.hadoop.Credentials#addtoJobConf(org.apache.hadoop.mapred.JobConf, 
+              org.apache.oozie.action.hadoop.CredentialsProperties, org.apache.oozie.action.ActionExecutor.Context)
+ */
+@Override
+public void addtoJobConf(JobConf jobconf, CredentialsProperties props, Context context) throws Exception {
+    try {
+        Token<DelegationTokenIdentifier> abctoken = new Token<DelegationTokenIdentifier>();
+        jobconf.getCredentials().addToken(new Text("ABC Token"), abctoken);
+        XLog.getLog(getClass()).debug("Added the ABC token in job conf");
+    }
+    catch (Exception e) {
+        XLog.getLog(getClass()).warn("Exception in addtoJobConf", e);
+        throw e;
+    }
+  }
+}
+</verbatim>
+
+[[index][::Go back to Oozie Documentation Index::]]
+
+</noautolink>
\ No newline at end of file

Modified: incubator/oozie/trunk/docs/src/site/twiki/index.twiki
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/docs/src/site/twiki/index.twiki?rev=1373568&r1=1373567&r2=1373568&view=diff
==============================================================================
--- incubator/oozie/trunk/docs/src/site/twiki/index.twiki (original)
+++ incubator/oozie/trunk/docs/src/site/twiki/index.twiki Wed Aug 15 18:54:51 2012
@@ -48,6 +48,7 @@ Enough reading already? Follow the steps
    * [[./client/apidocs/index.html][Oozie Client Javadocs]]
    * [[./core/apidocs/index.html][Oozie Core Javadocs]]
    * [[WebServicesAPI][Oozie Web Services API]]
+   * [[DG_UnifiedCredentialsModule][Unified Credentials Module]]
 
 ---+++ Action Extensions
 

Modified: incubator/oozie/trunk/release-log.txt
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/release-log.txt?rev=1373568&r1=1373567&r2=1373568&view=diff
==============================================================================
--- incubator/oozie/trunk/release-log.txt (original)
+++ incubator/oozie/trunk/release-log.txt Wed Aug 15 18:54:51 2012
@@ -1,5 +1,6 @@
 -- Oozie 3.3.0 release (trunk - unreleased)
 
+OOZIE-889 Adding HCat credentials class for job conf (mona via virag)
 OOZIE-940 Junk messages appear in tomcat log (egashira via virag)
 OOZIE-955 TestCoordELFunctions and TestELConstantFunctions failing (bcry via tucu)
 OOZIE-954 Global section xsd should allow job-xml elements and update documentation (rkanter via tucu)