You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hadoop.apache.org by "Erravelli, Venkat" <ve...@baml.com> on 2012/11/28 16:53:37 UTC

submitting a mapreduce job to remote cluster

Hello :

I see the below exception when I submit a MapReduce Job from standalone java application to a remote Hadoop cluster. Cluster authentication mechanism is Kerberos.

Below is the code. I am using user impersonation since I need to submit the job as a hadoop cluster user (userx) from my machine, on which I am logged is as user99. So:

userx -- user that is setup on the hadoop cluster.
user99 -- user on whoes machine the standalone java application code is executing.

                    System.setProperty("HADOOP_USER_NAME", "userx");

            final Configuration conf = new Configuration();

            conf.set("hadoop.security.auth_to_local",
                        "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//<mailto:.*@\\Q\\E$)s/@\\Q\\E$//>"
                                    + "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//<mailto:.*@\\Q\\E$)s/@\\Q\\E$//>" + "DEFAULT");

            conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");

            conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");

            UserGroupInformation.setConfiguration(conf);

            System.out.println("here ::::: "+ UserGroupInformation.getCurrentUser());

UserGroupInformation ugi = UserGroupInformation.createProxyUser("user99", UserGroupInformation.getCurrentUser());
            AuthenticationMethod am = AuthenticationMethod.KERBEROS;
            ugi.setAuthenticationMethod(am);


            final Path inPath = new Path("/user/userx/test.txt");

            DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
            StringBuilder sb = new StringBuilder();
            sb.append("wordcount_result_").append(df.format(new Date()));

            // out
            final Path outPath = new Path(sb.toString());

            ugi.doAs(new PrivilegedExceptionAction<UserGroupInformation>() {   <<<<---------throws exception here!!!

                  public UserGroupInformation run() throws Exception {
                        // Submit a job
                        // create a new job based on the configuration
                        Job job = new Job(conf, "word count remote");

                        job.setJarByClass(WordCountJob.class);
                        job.setMapperClass(TokenizerMapper.class);
                        job.setCombinerClass(IntSumReducer.class);
                        job.setReducerClass(IntSumReducer.class);
                        job.setOutputKeyClass(Text.class);
                        job.setOutputValueClass(IntWritable.class);
                        FileInputFormat.addInputPath(job, inPath);
                        FileOutputFormat.setOutputPath(job, outPath);

                        // this waits until the job completes
                        job.waitForCompletion(true);

                        if (job.isSuccessful()) {
                              System.out.println("Job completed successfully");
                        } else {
                              System.out.println("Job Failed");
                        }
                        return UserGroupInformation.getCurrentUser();

                  }
            });

When the above code is executed, I get the below exception on the line mentioned in the code above:
***************
12/11/28 09:43:51 ERROR security.UserGroupInformation: PriviledgedActionException as: user99 (auth:KERBEROS) via userx (auth:SIMPLE) cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Authorization (hadoop.security.authorization) is enabled but authentication (hadoop.security.authentication) is configured as simple. Please configure another method like kerberos or digest.
Exception in thread "Main Thread" org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Authorization (hadoop.security.authorization) is enabled but authentication (hadoop.security.authentication) is configured as simple. Please configure another method like kerberos or digest.
***************
Can someone tell me/point me in the right direction on what is going on here, and how do i get over this exception? Any help will be greatly appreciated. thanks!

Below are the hadoop cluster configuration files:

***************
Core-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
<configuration>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://xxxxx.yyyy.com:9920</value>
  </property>
  <property>
    <name>io.file.buffer.size</name>
    <value>65536</value>
  </property>
  <property>
    <name>io.compression.codecs</name>
    <value></value>
  </property>
  <property>
    <name>hadoop.security.authentication</name>
    <value>kerberos</value>
  </property>
  <property>
    <name>hadoop.security.auth_to_local</name>
    <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//<mailto:.*@\Q\E$)s/@\Q\E$//>
RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//<mailto:.*@\Q\E$)s/@\Q\E$//>
DEFAULT</value>
  </property>
</configuration>


Hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
<configuration>
  <property>
    <name>dfs.https.address</name>
    <value>xxxxx.yyyy.com:50470</value>
  </property>
  <property>
    <name>dfs.https.port</name>
    <value>50470</value>
  </property>
  <property>
    <name>dfs.namenode.http-address</name>
    <value>xxxxx.yyyy.com:50070</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.blocksize</name>
    <value>134217728</value>
  </property>
  <property>
    <name>dfs.client.use.datanode.hostname</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.block.access.token.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.namenode.kerberos.principal</name>
    <value>hdfs/_HOST@RND.HDFS.COM</value<mailto:hdfs/_HOST@RND.HDFS.COM%3c/value>>
  </property>
  <property>
    <name>dfs.namenode.kerberos.https.principal</name>
    <value>host/_HOST@RND.HDFS.COM</value<mailto:host/_HOST@RND.HDFS.COM%3c/value>>
  </property>
  <property>
    <name>dfs.namenode.kerberos.internal.spnego.principal</name>
    <value>HTTP/_HOST@RND.HDFS.COM</value<mailto:HTTP/_HOST@RND.HDFS.COM%3c/value>>
  </property>
</configuration>


Mapred-site.xml


<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
<configuration>
  <property>
    <name>mapred.job.tracker</name>
    <value>abcde.yyyy.com:9921</value>
  </property>
  <property>
    <name>mapred.output.compress</name>
    <value>false</value>
  </property>
  <property>
    <name>mapred.output.compression.type</name>
    <value>BLOCK</value>
  </property>
  <property>
    <name>mapred.output.compression.codec</name>
    <value>org.apache.hadoop.io.compress.DefaultCodec</value>
  </property>
  <property>
    <name>mapred.map.output.compression.codec</name>
    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>
  <property>
    <name>mapred.compress.map.output</name>
    <value>true</value>
  </property>
  <property>
    <name>io.sort.factor</name>
    <value>64</value>
  </property>
  <property>
    <name>io.sort.record.percent</name>
    <value>0.05</value>
  </property>
  <property>
    <name>io.sort.spill.percent</name>
    <value>0.8</value>
  </property>
  <property>
    <name>mapred.reduce.parallel.copies</name>
    <value>10</value>
  </property>
  <property>
    <name>mapred.submit.replication</name>
    <value>10</value>
  </property>
  <property>
    <name>mapred.reduce.tasks</name>
    <value>72</value>
  </property>
  <property>
    <name>io.sort.mb</name>
    <value>256</value>
  </property>
  <property>
    <name>mapred.child.java.opts</name>
    <value> -Xmx1073741824</value>
  </property>
  <property>
    <name>mapred.job.reuse.jvm.num.tasks</name>
    <value>1</value>
  </property>
  <property>
    <name>mapred.map.tasks.speculative.execution</name>
    <value>false</value>
  </property>
  <property>
    <name>mapred.reduce.tasks.speculative.execution</name>
    <value>false</value>
  </property>
  <property>
    <name>mapred.reduce.slowstart.completed.maps</name>
    <value>1.0</value>
  </property>
  <property>
    <name>mapreduce.jobtracker.kerberos.principal</name>
    <value>mapred/_HOST@RND.HDFS.COM</value<mailto:mapred/_HOST@RND.HDFS.COM%3c/value>>
  </property>
  <property>
    <name>mapreduce.jobtracker.kerberos.https.principal</name>
    <value>host/_HOST@RND.HDFS.COM</value<mailto:host/_HOST@RND.HDFS.COM%3c/value>>
  </property>
</configuration>


***************

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
I have tested this authentication piece separately to make sure if kerberos authentication is working fine; works fine.   Have krb5.ini, .java.login.config and java.security files on windows m/c.

How do I integrate this authentication functionality to submit the mapreduce job to remote Hadoop cluster? What am I missing?  Thank you.

        public static void main(String[] args) throws Exception {

                // System.out.println(System.getProperty("java.home"));

                if (args.length > 0) {
                        name = args[0];
                } else {
                        name = "test";
                }

                // Create action to perform
                PrivilegedExceptionAction action = new MyAction();

                loginAndAction(name, action);
        }

        static void loginAndAction(String name, PrivilegedExceptionAction action)
                        throws LoginException, PrivilegedActionException {

                // Create a callback handler
                CallbackHandler callbackHandler = new TextCallbackHandler();

                LoginContext context = null;

                try {
                        // Create a LoginContext with a callback handler
                        context = new LoginContext(name, callbackHandler);

                        // Perform authentication
                        context.login();
                } catch (LoginException e) {
                        System.err.println("Login failed");
                        e.printStackTrace();
                        System.exit(-1);
                }

                // Perform action as authenticated user
                Subject subject = context.getSubject();
                if (verbose) {
                        System.out.println(subject.toString());
                } else {
                        System.out.println("Authenticated principal: "
                                        + subject.getPrincipals());
                }

                //Subject.doAs(subject, action);

                context.logout();
        }


Output
--------------------------------------------------
Kerberos password for user1: *********
Authenticated principal: [user1@xyz.HDFS.COM]
----------------------------------------------------

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com]
Sent: Wednesday, November 28, 2012 12:23 PM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Hi,

This appears to be more of an environment or JRE config issue. Your Windows machine needs the kerberos configuration files on it for Java security APIs to be able to locate which KDC to talk to, for logging in. You can also manually specify the path to such a configuration - read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it
> fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception
>> {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the
>> configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the
>> line mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available
>> at http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
I have tested this authentication piece separately to make sure if kerberos authentication is working fine; works fine.   Have krb5.ini, .java.login.config and java.security files on windows m/c.

How do I integrate this authentication functionality to submit the mapreduce job to remote Hadoop cluster? What am I missing?  Thank you.

        public static void main(String[] args) throws Exception {

                // System.out.println(System.getProperty("java.home"));

                if (args.length > 0) {
                        name = args[0];
                } else {
                        name = "test";
                }

                // Create action to perform
                PrivilegedExceptionAction action = new MyAction();

                loginAndAction(name, action);
        }

        static void loginAndAction(String name, PrivilegedExceptionAction action)
                        throws LoginException, PrivilegedActionException {

                // Create a callback handler
                CallbackHandler callbackHandler = new TextCallbackHandler();

                LoginContext context = null;

                try {
                        // Create a LoginContext with a callback handler
                        context = new LoginContext(name, callbackHandler);

                        // Perform authentication
                        context.login();
                } catch (LoginException e) {
                        System.err.println("Login failed");
                        e.printStackTrace();
                        System.exit(-1);
                }

                // Perform action as authenticated user
                Subject subject = context.getSubject();
                if (verbose) {
                        System.out.println(subject.toString());
                } else {
                        System.out.println("Authenticated principal: "
                                        + subject.getPrincipals());
                }

                //Subject.doAs(subject, action);

                context.logout();
        }


Output
--------------------------------------------------
Kerberos password for user1: *********
Authenticated principal: [user1@xyz.HDFS.COM]
----------------------------------------------------

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com]
Sent: Wednesday, November 28, 2012 12:23 PM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Hi,

This appears to be more of an environment or JRE config issue. Your Windows machine needs the kerberos configuration files on it for Java security APIs to be able to locate which KDC to talk to, for logging in. You can also manually specify the path to such a configuration - read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it
> fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception
>> {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the
>> configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the
>> line mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available
>> at http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
I have tested this authentication piece separately to make sure if kerberos authentication is working fine; works fine.   Have krb5.ini, .java.login.config and java.security files on windows m/c.

How do I integrate this authentication functionality to submit the mapreduce job to remote Hadoop cluster? What am I missing?  Thank you.

        public static void main(String[] args) throws Exception {

                // System.out.println(System.getProperty("java.home"));

                if (args.length > 0) {
                        name = args[0];
                } else {
                        name = "test";
                }

                // Create action to perform
                PrivilegedExceptionAction action = new MyAction();

                loginAndAction(name, action);
        }

        static void loginAndAction(String name, PrivilegedExceptionAction action)
                        throws LoginException, PrivilegedActionException {

                // Create a callback handler
                CallbackHandler callbackHandler = new TextCallbackHandler();

                LoginContext context = null;

                try {
                        // Create a LoginContext with a callback handler
                        context = new LoginContext(name, callbackHandler);

                        // Perform authentication
                        context.login();
                } catch (LoginException e) {
                        System.err.println("Login failed");
                        e.printStackTrace();
                        System.exit(-1);
                }

                // Perform action as authenticated user
                Subject subject = context.getSubject();
                if (verbose) {
                        System.out.println(subject.toString());
                } else {
                        System.out.println("Authenticated principal: "
                                        + subject.getPrincipals());
                }

                //Subject.doAs(subject, action);

                context.logout();
        }


Output
--------------------------------------------------
Kerberos password for user1: *********
Authenticated principal: [user1@xyz.HDFS.COM]
----------------------------------------------------

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com]
Sent: Wednesday, November 28, 2012 12:23 PM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Hi,

This appears to be more of an environment or JRE config issue. Your Windows machine needs the kerberos configuration files on it for Java security APIs to be able to locate which KDC to talk to, for logging in. You can also manually specify the path to such a configuration - read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it
> fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception
>> {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the
>> configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the
>> line mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available
>> at http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
I have tested this authentication piece separately to make sure if kerberos authentication is working fine; works fine.   Have krb5.ini, .java.login.config and java.security files on windows m/c.

How do I integrate this authentication functionality to submit the mapreduce job to remote Hadoop cluster? What am I missing?  Thank you.

        public static void main(String[] args) throws Exception {

                // System.out.println(System.getProperty("java.home"));

                if (args.length > 0) {
                        name = args[0];
                } else {
                        name = "test";
                }

                // Create action to perform
                PrivilegedExceptionAction action = new MyAction();

                loginAndAction(name, action);
        }

        static void loginAndAction(String name, PrivilegedExceptionAction action)
                        throws LoginException, PrivilegedActionException {

                // Create a callback handler
                CallbackHandler callbackHandler = new TextCallbackHandler();

                LoginContext context = null;

                try {
                        // Create a LoginContext with a callback handler
                        context = new LoginContext(name, callbackHandler);

                        // Perform authentication
                        context.login();
                } catch (LoginException e) {
                        System.err.println("Login failed");
                        e.printStackTrace();
                        System.exit(-1);
                }

                // Perform action as authenticated user
                Subject subject = context.getSubject();
                if (verbose) {
                        System.out.println(subject.toString());
                } else {
                        System.out.println("Authenticated principal: "
                                        + subject.getPrincipals());
                }

                //Subject.doAs(subject, action);

                context.logout();
        }


Output
--------------------------------------------------
Kerberos password for user1: *********
Authenticated principal: [user1@xyz.HDFS.COM]
----------------------------------------------------

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com]
Sent: Wednesday, November 28, 2012 12:23 PM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Hi,

This appears to be more of an environment or JRE config issue. Your Windows machine needs the kerberos configuration files on it for Java security APIs to be able to locate which KDC to talk to, for logging in. You can also manually specify the path to such a configuration - read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it
> fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception
>> {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the
>> configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the
>> line mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available
>> at http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Hi,

This appears to be more of an environment or JRE config issue. Your
Windows machine needs the kerberos configuration files on it for Java
security APIs to be able to locate which KDC to talk to, for logging
in. You can also manually specify the path to such a configuration -
read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config
file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the line
>> mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available at
>> http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



-- 
Harsh J

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Hi,

This appears to be more of an environment or JRE config issue. Your
Windows machine needs the kerberos configuration files on it for Java
security APIs to be able to locate which KDC to talk to, for logging
in. You can also manually specify the path to such a configuration -
read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config
file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the line
>> mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available at
>> http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



-- 
Harsh J

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Hi,

This appears to be more of an environment or JRE config issue. Your
Windows machine needs the kerberos configuration files on it for Java
security APIs to be able to locate which KDC to talk to, for logging
in. You can also manually specify the path to such a configuration -
read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config
file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the line
>> mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available at
>> http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



-- 
Harsh J

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Hi,

This appears to be more of an environment or JRE config issue. Your
Windows machine needs the kerberos configuration files on it for Java
security APIs to be able to locate which KDC to talk to, for logging
in. You can also manually specify the path to such a configuration -
read http://docs.oracle.com/javase/1.4.2/docs/guide/security/jgss/tutorials/KerberosReq.html
for some behavior data on how a JRE would locate the kerberos config
file on different platforms, and how you may override it.

On Wed, Nov 28, 2012 at 10:43 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Tried the below :
>
> conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
>
>
> UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception
>
>
> Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.lang.reflect.InvocationTargetException
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
>         at com.ard.WordCountJob.process2(WordCountJob.java:147)
>         at com.ard.WordCountJob.main(WordCountJob.java:198)
> Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at sun.security.krb5.Config.<init>(Config.java:147)
>         at sun.security.krb5.Config.getInstance(Config.java:79)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
>         at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
>         at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
>         at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
>         at com.ard.WordCountJob.process2(WordCountJob.java:142)
>         at com.ard.WordCountJob.main(WordCountJob.java:197)
> Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
>         at java.io.FileInputStream.open(Native Method)
>         at java.io.FileInputStream.<init>(FileInputStream.java:106)
>         at java.io.FileInputStream.<init>(FileInputStream.java:66)
>         at sun.security.krb5.Config$1.run(Config.java:539)
>         at sun.security.krb5.Config.loadConfigFile(Config.java:535)
>         at sun.security.krb5.Config.<init>(Config.java:144)
>         ... 11 more
>
> -----Original Message-----
> From: Harsh J [mailto:harsh@cloudera.com]
> Sent: Wednesday, November 28, 2012 11:35 AM
> To: <us...@hadoop.apache.org>
> Subject: Re: submitting a mapreduce job to remote cluster
>
> Are you positive that your cluster/client configuration files'
> directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.
>
> Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.
>
> On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
>> Hello :
>>
>>
>>
>> I see the below exception when I submit a MapReduce Job from
>> standalone java application to a remote Hadoop cluster. Cluster
>> authentication mechanism is Kerberos.
>>
>>
>>
>> Below is the code. I am using user impersonation since I need to
>> submit the job as a hadoop cluster user (userx) from my machine, on
>> which I am logged is as user99. So:
>>
>>
>>
>> userx -- user that is setup on the hadoop cluster.
>>
>> user99 -- user on whoes machine the standalone java application code
>> is executing.
>>
>>
>>
>>                     System.setProperty("HADOOP_USER_NAME", "userx");
>>
>>
>>
>>             final Configuration conf = new Configuration();
>>
>>
>>
>>             conf.set("hadoop.security.auth_to_local",
>>
>>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>>
>>                                     +
>> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>>
>>
>>
>>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>>
>>
>>
>>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>>
>>
>>
>>             UserGroupInformation.setConfiguration(conf);
>>
>>
>>
>>             System.out.println("here ::::: "+
>> UserGroupInformation.getCurrentUser());
>>
>>
>>
>> UserGroupInformation ugi =
>> UserGroupInformation.createProxyUser("user99",
>> UserGroupInformation.getCurrentUser());
>>
>>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>>
>>             ugi.setAuthenticationMethod(am);
>>
>>
>>
>>
>>
>>             final Path inPath = new Path("/user/userx/test.txt");
>>
>>
>>
>>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>>
>>             StringBuilder sb = new StringBuilder();
>>
>>             sb.append("wordcount_result_").append(df.format(new
>> Date()));
>>
>>
>>
>>             // out
>>
>>             final Path outPath = new Path(sb.toString());
>>
>>
>>
>>             ugi.doAs(new
>> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>>
>>
>>
>>                   public UserGroupInformation run() throws Exception {
>>
>>                         // Submit a job
>>
>>                         // create a new job based on the configuration
>>
>>                         Job job = new Job(conf, "word count remote");
>>
>>
>>
>>                         job.setJarByClass(WordCountJob.class);
>>
>>                         job.setMapperClass(TokenizerMapper.class);
>>
>>                         job.setCombinerClass(IntSumReducer.class);
>>
>>                         job.setReducerClass(IntSumReducer.class);
>>
>>                         job.setOutputKeyClass(Text.class);
>>
>>                         job.setOutputValueClass(IntWritable.class);
>>
>>                         FileInputFormat.addInputPath(job, inPath);
>>
>>                         FileOutputFormat.setOutputPath(job, outPath);
>>
>>
>>
>>                         // this waits until the job completes
>>
>>                         job.waitForCompletion(true);
>>
>>
>>
>>                         if (job.isSuccessful()) {
>>
>>                               System.out.println("Job completed
>> successfully");
>>
>>                         } else {
>>
>>                               System.out.println("Job Failed");
>>
>>                         }
>>
>>                         return UserGroupInformation.getCurrentUser();
>>
>>
>>
>>                   }
>>
>>             });
>>
>>
>>
>> When the above code is executed, I get the below exception on the line
>> mentioned in the code above:
>>
>> ***************
>>
>> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
>> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
>> (auth:SIMPLE)
>> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> Exception in thread "Main Thread"
>> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
>> Authorization (hadoop.security.authorization) is enabled but
>> authentication
>> (hadoop.security.authentication) is configured as simple. Please
>> configure another method like kerberos or digest.
>>
>> ***************
>>
>> Can someone tell me/point me in the right direction on what is going
>> on here, and how do i get over this exception? Any help will be
>> greatly appreciated. thanks!
>>
>>
>>
>> Below are the hadoop cluster configuration files:
>>
>>
>>
>> ***************
>>
>> Core-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>fs.defaultFS</name>
>>
>>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.file.buffer.size</name>
>>
>>     <value>65536</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.compression.codecs</name>
>>
>>     <value></value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.authentication</name>
>>
>>     <value>kerberos</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>hadoop.security.auth_to_local</name>
>>
>>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>>
>> DEFAULT</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Hdfs-site.xml
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>dfs.https.address</name>
>>
>>     <value>xxxxx.yyyy.com:50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.https.port</name>
>>
>>     <value>50470</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.http-address</name>
>>
>>     <value>xxxxx.yyyy.com:50070</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.replication</name>
>>
>>     <value>3</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.blocksize</name>
>>
>>     <value>134217728</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.client.use.datanode.hostname</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.block.access.token.enable</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.principal</name>
>>
>>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>>
>>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> Mapred-site.xml
>>
>>
>>
>>
>>
>> <?xml version="1.0" encoding="UTF-8"?>
>>
>>
>>
>> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>>
>> <configuration>
>>
>>   <property>
>>
>>     <name>mapred.job.tracker</name>
>>
>>     <value>abcde.yyyy.com:9921</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compress</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.type</name>
>>
>>     <value>BLOCK</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.output.compression.codec</name>
>>
>>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.compress.map.output</name>
>>
>>     <value>true</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.factor</name>
>>
>>     <value>64</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.record.percent</name>
>>
>>     <value>0.05</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.spill.percent</name>
>>
>>     <value>0.8</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.parallel.copies</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.submit.replication</name>
>>
>>     <value>10</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks</name>
>>
>>     <value>72</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>io.sort.mb</name>
>>
>>     <value>256</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.child.java.opts</name>
>>
>>     <value> -Xmx1073741824</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.job.reuse.jvm.num.tasks</name>
>>
>>     <value>1</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.map.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.tasks.speculative.execution</name>
>>
>>     <value>false</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapred.reduce.slowstart.completed.maps</name>
>>
>>     <value>1.0</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.principal</name>
>>
>>     <value>mapred/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>>   <property>
>>
>>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>>
>>     <value>host/_HOST@RND.HDFS.COM</value>
>>
>>   </property>
>>
>> </configuration>
>>
>>
>>
>>
>>
>> ***************
>>
>>
>>
>> ________________________________
>> This message, and any attachments, is for the intended recipient(s)
>> only, may contain information that is privileged, confidential and/or
>> proprietary and subject to important terms and conditions available at
>> http://www.bankofamerica.com/emaildisclaimer. If you are not the
>> intended recipient, please delete this message.
>
>
>
> --
> Harsh J
>
> ----------------------------------------------------------------------
> This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.



-- 
Harsh J

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
Tried the below :

conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
		

UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception


Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
	at sun.security.krb5.Config.<init>(Config.java:147)
	at sun.security.krb5.Config.getInstance(Config.java:79)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
	at java.io.FileInputStream.open(Native Method)
	at java.io.FileInputStream.<init>(FileInputStream.java:106)
	at java.io.FileInputStream.<init>(FileInputStream.java:66)
	at sun.security.krb5.Config$1.run(Config.java:539)
	at sun.security.krb5.Config.loadConfigFile(Config.java:535)
	at sun.security.krb5.Config.<init>(Config.java:144)
	... 11 more

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com] 
Sent: Wednesday, November 28, 2012 11:35 AM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from 
> standalone java application to a remote Hadoop cluster. Cluster 
> authentication mechanism is Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to 
> submit the job as a hadoop cluster user (userx) from my machine, on 
> which I am logged is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code 
> is executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+ 
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = 
> UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new 
> Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new 
> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed 
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line 
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going 
> on here, and how do i get over this exception? Any help will be 
> greatly appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) 
> only, may contain information that is privileged, confidential and/or 
> proprietary and subject to important terms and conditions available at 
> http://www.bankofamerica.com/emaildisclaimer. If you are not the 
> intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
Tried the below :

conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
		

UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception


Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
	at sun.security.krb5.Config.<init>(Config.java:147)
	at sun.security.krb5.Config.getInstance(Config.java:79)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
	at java.io.FileInputStream.open(Native Method)
	at java.io.FileInputStream.<init>(FileInputStream.java:106)
	at java.io.FileInputStream.<init>(FileInputStream.java:66)
	at sun.security.krb5.Config$1.run(Config.java:539)
	at sun.security.krb5.Config.loadConfigFile(Config.java:535)
	at sun.security.krb5.Config.<init>(Config.java:144)
	... 11 more

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com] 
Sent: Wednesday, November 28, 2012 11:35 AM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from 
> standalone java application to a remote Hadoop cluster. Cluster 
> authentication mechanism is Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to 
> submit the job as a hadoop cluster user (userx) from my machine, on 
> which I am logged is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code 
> is executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+ 
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = 
> UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new 
> Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new 
> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed 
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line 
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going 
> on here, and how do i get over this exception? Any help will be 
> greatly appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) 
> only, may contain information that is privileged, confidential and/or 
> proprietary and subject to important terms and conditions available at 
> http://www.bankofamerica.com/emaildisclaimer. If you are not the 
> intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
Tried the below :

conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
		

UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception


Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
	at sun.security.krb5.Config.<init>(Config.java:147)
	at sun.security.krb5.Config.getInstance(Config.java:79)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
	at java.io.FileInputStream.open(Native Method)
	at java.io.FileInputStream.<init>(FileInputStream.java:106)
	at java.io.FileInputStream.<init>(FileInputStream.java:66)
	at sun.security.krb5.Config$1.run(Config.java:539)
	at sun.security.krb5.Config.loadConfigFile(Config.java:535)
	at sun.security.krb5.Config.<init>(Config.java:144)
	... 11 more

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com] 
Sent: Wednesday, November 28, 2012 11:35 AM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from 
> standalone java application to a remote Hadoop cluster. Cluster 
> authentication mechanism is Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to 
> submit the job as a hadoop cluster user (userx) from my machine, on 
> which I am logged is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code 
> is executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+ 
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = 
> UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new 
> Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new 
> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed 
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line 
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going 
> on here, and how do i get over this exception? Any help will be 
> greatly appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) 
> only, may contain information that is privileged, confidential and/or 
> proprietary and subject to important terms and conditions available at 
> http://www.bankofamerica.com/emaildisclaimer. If you are not the 
> intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

RE: submitting a mapreduce job to remote cluster

Posted by "Erravelli, Venkat" <ve...@baml.com>.
Tried the below :

conf.set("hadoop.security.authentication", "kerberos");  >>>>>>>>  Added this line.
		

UserGroupInformation.setConfiguration(conf);  <<<<<<<<<<<<< Now, it fails on this line with the below exception


Exception in thread "Main Thread" java.lang.ExceptionInInitializerError
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: java.lang.IllegalArgumentException: Can't get Kerberos configuration
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:44)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:268)
	at com.ard.WordCountJob.process2(WordCountJob.java:147)
	at com.ard.WordCountJob.main(WordCountJob.java:198)
Caused by: KrbException: Could not load configuration file C:\WINNT\krb5.ini (The system cannot find the file specified)
	at sun.security.krb5.Config.<init>(Config.java:147)
	at sun.security.krb5.Config.getInstance(Config.java:79)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:63)
	at org.apache.hadoop.security.HadoopKerberosName.<clinit>(HadoopKerberosName.java:41)
	at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:227)
	at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:267)
	at com.ard.WordCountJob.process2(WordCountJob.java:142)
	at com.ard.WordCountJob.main(WordCountJob.java:197)
Caused by: java.io.FileNotFoundException: C:\WINNT\krb5.ini (The system cannot find the file specified)
	at java.io.FileInputStream.open(Native Method)
	at java.io.FileInputStream.<init>(FileInputStream.java:106)
	at java.io.FileInputStream.<init>(FileInputStream.java:66)
	at sun.security.krb5.Config$1.run(Config.java:539)
	at sun.security.krb5.Config.loadConfigFile(Config.java:535)
	at sun.security.krb5.Config.<init>(Config.java:144)
	... 11 more

-----Original Message-----
From: Harsh J [mailto:harsh@cloudera.com] 
Sent: Wednesday, November 28, 2012 11:35 AM
To: <us...@hadoop.apache.org>
Subject: Re: submitting a mapreduce job to remote cluster

Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its values would get automatically read when you instantiate the Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to "kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat <ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from 
> standalone java application to a remote Hadoop cluster. Cluster 
> authentication mechanism is Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to 
> submit the job as a hadoop cluster user (userx) from my machine, on 
> which I am logged is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code 
> is executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+ 
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = 
> UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new 
> Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new 
> PrivilegedExceptionAction<UserGroupInformation>() { <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed 
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line 
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but 
> authentication
> (hadoop.security.authentication) is configured as simple. Please 
> configure another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going 
> on here, and how do i get over this exception? Any help will be 
> greatly appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) 
> only, may contain information that is privileged, confidential and/or 
> proprietary and subject to important terms and conditions available at 
> http://www.bankofamerica.com/emaildisclaimer. If you are not the 
> intended recipient, please delete this message.



--
Harsh J

----------------------------------------------------------------------
This message, and any attachments, is for the intended recipient(s) only, may contain information that is privileged, confidential and/or proprietary and subject to important terms and conditions available at http://www.bankofamerica.com/emaildisclaimer.   If you are not the intended recipient, please delete this message.

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its
values would get automatically read when you instantiate the
Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to
"kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from standalone java
> application to a remote Hadoop cluster. Cluster authentication mechanism is
> Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to submit the
> job as a hadoop cluster user (userx) from my machine, on which I am logged
> is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code is
> executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new PrivilegedExceptionAction<UserGroupInformation>() {
> <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going on
> here, and how do i get over this exception? Any help will be greatly
> appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) only,
> may contain information that is privileged, confidential and/or proprietary
> and subject to important terms and conditions available at
> http://www.bankofamerica.com/emaildisclaimer. If you are not the intended
> recipient, please delete this message.



-- 
Harsh J

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its
values would get automatically read when you instantiate the
Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to
"kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from standalone java
> application to a remote Hadoop cluster. Cluster authentication mechanism is
> Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to submit the
> job as a hadoop cluster user (userx) from my machine, on which I am logged
> is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code is
> executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new PrivilegedExceptionAction<UserGroupInformation>() {
> <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going on
> here, and how do i get over this exception? Any help will be greatly
> appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) only,
> may contain information that is privileged, confidential and/or proprietary
> and subject to important terms and conditions available at
> http://www.bankofamerica.com/emaildisclaimer. If you are not the intended
> recipient, please delete this message.



-- 
Harsh J

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its
values would get automatically read when you instantiate the
Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to
"kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from standalone java
> application to a remote Hadoop cluster. Cluster authentication mechanism is
> Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to submit the
> job as a hadoop cluster user (userx) from my machine, on which I am logged
> is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code is
> executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new PrivilegedExceptionAction<UserGroupInformation>() {
> <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going on
> here, and how do i get over this exception? Any help will be greatly
> appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) only,
> may contain information that is privileged, confidential and/or proprietary
> and subject to important terms and conditions available at
> http://www.bankofamerica.com/emaildisclaimer. If you are not the intended
> recipient, please delete this message.



-- 
Harsh J

Re: submitting a mapreduce job to remote cluster

Posted by Harsh J <ha...@cloudera.com>.
Are you positive that your cluster/client configuration files'
directory is on the classpath when you run this job? Only then its
values would get automatically read when you instantiate the
Configuration class.

Alternatively, you may try to set: "hadoop.security.authentication" to
"kerberos" manually in your Configuration (conf) object.

On Wed, Nov 28, 2012 at 9:23 PM, Erravelli, Venkat
<ve...@baml.com> wrote:
> Hello :
>
>
>
> I see the below exception when I submit a MapReduce Job from standalone java
> application to a remote Hadoop cluster. Cluster authentication mechanism is
> Kerberos.
>
>
>
> Below is the code. I am using user impersonation since I need to submit the
> job as a hadoop cluster user (userx) from my machine, on which I am logged
> is as user99. So:
>
>
>
> userx -- user that is setup on the hadoop cluster.
>
> user99 -- user on whoes machine the standalone java application code is
> executing.
>
>
>
>                     System.setProperty("HADOOP_USER_NAME", "userx");
>
>
>
>             final Configuration conf = new Configuration();
>
>
>
>             conf.set("hadoop.security.auth_to_local",
>
>                         "RULE:[1:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//"
>
>                                     +
> "RULE:[2:$1@$0](.*@\\Q\\E$)s/@\\Q\\E$//" + "DEFAULT");
>
>
>
>             conf.set("mapred.job.tracker", "abcde.yyyy.com:9921");
>
>
>
>             conf.set("fs.defaultFS", "hdfs://xxxxx.yyyy.com:9920");
>
>
>
>             UserGroupInformation.setConfiguration(conf);
>
>
>
>             System.out.println("here ::::: "+
> UserGroupInformation.getCurrentUser());
>
>
>
> UserGroupInformation ugi = UserGroupInformation.createProxyUser("user99",
> UserGroupInformation.getCurrentUser());
>
>             AuthenticationMethod am = AuthenticationMethod.KERBEROS;
>
>             ugi.setAuthenticationMethod(am);
>
>
>
>
>
>             final Path inPath = new Path("/user/userx/test.txt");
>
>
>
>             DateFormat df = new SimpleDateFormat("dd_MM_yyyy_hh_mm");
>
>             StringBuilder sb = new StringBuilder();
>
>             sb.append("wordcount_result_").append(df.format(new Date()));
>
>
>
>             // out
>
>             final Path outPath = new Path(sb.toString());
>
>
>
>             ugi.doAs(new PrivilegedExceptionAction<UserGroupInformation>() {
> <<<<---------throws exception here!!!
>
>
>
>                   public UserGroupInformation run() throws Exception {
>
>                         // Submit a job
>
>                         // create a new job based on the configuration
>
>                         Job job = new Job(conf, "word count remote");
>
>
>
>                         job.setJarByClass(WordCountJob.class);
>
>                         job.setMapperClass(TokenizerMapper.class);
>
>                         job.setCombinerClass(IntSumReducer.class);
>
>                         job.setReducerClass(IntSumReducer.class);
>
>                         job.setOutputKeyClass(Text.class);
>
>                         job.setOutputValueClass(IntWritable.class);
>
>                         FileInputFormat.addInputPath(job, inPath);
>
>                         FileOutputFormat.setOutputPath(job, outPath);
>
>
>
>                         // this waits until the job completes
>
>                         job.waitForCompletion(true);
>
>
>
>                         if (job.isSuccessful()) {
>
>                               System.out.println("Job completed
> successfully");
>
>                         } else {
>
>                               System.out.println("Job Failed");
>
>                         }
>
>                         return UserGroupInformation.getCurrentUser();
>
>
>
>                   }
>
>             });
>
>
>
> When the above code is executed, I get the below exception on the line
> mentioned in the code above:
>
> ***************
>
> 12/11/28 09:43:51 ERROR security.UserGroupInformation:
> PriviledgedActionException as: user99 (auth:KERBEROS) via userx
> (auth:SIMPLE)
> cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> Exception in thread "Main Thread"
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
> Authorization (hadoop.security.authorization) is enabled but authentication
> (hadoop.security.authentication) is configured as simple. Please configure
> another method like kerberos or digest.
>
> ***************
>
> Can someone tell me/point me in the right direction on what is going on
> here, and how do i get over this exception? Any help will be greatly
> appreciated. thanks!
>
>
>
> Below are the hadoop cluster configuration files:
>
>
>
> ***************
>
> Core-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>fs.defaultFS</name>
>
>     <value>hdfs://xxxxx.yyyy.com:9920</value>
>
>   </property>
>
>   <property>
>
>     <name>io.file.buffer.size</name>
>
>     <value>65536</value>
>
>   </property>
>
>   <property>
>
>     <name>io.compression.codecs</name>
>
>     <value></value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.authentication</name>
>
>     <value>kerberos</value>
>
>   </property>
>
>   <property>
>
>     <name>hadoop.security.auth_to_local</name>
>
>     <value>RULE:[1:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> RULE:[2:$1@$0](.*@\Q\E$)s/@\Q\E$//
>
> DEFAULT</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Hdfs-site.xml
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.467Z-->
>
> <configuration>
>
>   <property>
>
>     <name>dfs.https.address</name>
>
>     <value>xxxxx.yyyy.com:50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.https.port</name>
>
>     <value>50470</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.http-address</name>
>
>     <value>xxxxx.yyyy.com:50070</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.replication</name>
>
>     <value>3</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.blocksize</name>
>
>     <value>134217728</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.client.use.datanode.hostname</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.block.access.token.enable</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.principal</name>
>
>     <value>hdfs/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
>
>     <value>HTTP/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> Mapred-site.xml
>
>
>
>
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <!--Autogenerated by Cloudera CM on 2012-11-06T20:18:31.456Z-->
>
> <configuration>
>
>   <property>
>
>     <name>mapred.job.tracker</name>
>
>     <value>abcde.yyyy.com:9921</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compress</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.type</name>
>
>     <value>BLOCK</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.DefaultCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.output.compression.codec</name>
>
>     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.compress.map.output</name>
>
>     <value>true</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.factor</name>
>
>     <value>64</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.record.percent</name>
>
>     <value>0.05</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.spill.percent</name>
>
>     <value>0.8</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.parallel.copies</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.submit.replication</name>
>
>     <value>10</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks</name>
>
>     <value>72</value>
>
>   </property>
>
>   <property>
>
>     <name>io.sort.mb</name>
>
>     <value>256</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.child.java.opts</name>
>
>     <value> -Xmx1073741824</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.job.reuse.jvm.num.tasks</name>
>
>     <value>1</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.map.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>
>     <value>false</value>
>
>   </property>
>
>   <property>
>
>     <name>mapred.reduce.slowstart.completed.maps</name>
>
>     <value>1.0</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.principal</name>
>
>     <value>mapred/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
>   <property>
>
>     <name>mapreduce.jobtracker.kerberos.https.principal</name>
>
>     <value>host/_HOST@RND.HDFS.COM</value>
>
>   </property>
>
> </configuration>
>
>
>
>
>
> ***************
>
>
>
> ________________________________
> This message, and any attachments, is for the intended recipient(s) only,
> may contain information that is privileged, confidential and/or proprietary
> and subject to important terms and conditions available at
> http://www.bankofamerica.com/emaildisclaimer. If you are not the intended
> recipient, please delete this message.



-- 
Harsh J