You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Sudhir Jangir <su...@infoobjects.com> on 2017/05/24 17:07:18 UTC

One question / kerberos, yarn-cluster -> connection to hbase

Facing one issue with Kerberos enabled Hadoop/CDH cluster.

 

We are trying to run a streaming job on yarn-cluster, which interacts with Kafka (direct stream), and hbase. 

 

Somehow, we are not able to connect to hbase in the cluster mode. We use keytab to login to hbase. 

 

This is what we do:

spark-submit --master yarn-cluster --keytab "dev.keytab" --principal "dev@IO-INT.COM"  --conf "spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j_executor_conf.properties -XX:+UseG1GC" --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j_driver_conf.properties -XX:+UseG1GC" --conf spark.yarn.stagingDir=hdfs:///tmp/spark/ --files "job.properties,log4j_driver_conf.properties,log4j_executor_conf.properties" service-0.0.1-SNAPSHOT.jar job.properties

 

To connect to hbase:

 def getHbaseConnection(properties: SerializedProperties): (Connection, UserGroupInformation) = {

 

   

    val config = HBaseConfiguration.create();

    config.set("hbase.zookeeper.quorum", HBASE_ZOOKEEPER_QUORUM_VALUE);

    config.set("hbase.zookeeper.property.clientPort", 2181);

    config.set("hadoop.security.authentication", "kerberos");

    config.set("hbase.security.authentication", "kerberos");

    config.set("hbase.cluster.distributed", "true");

    config.set("hbase.rpc.protection", "privacy");

   config.set("hbase.regionserver.kerberos.principal", “hbase/_HOST@IO-INT.COM”);

    config.set("hbase.master.kerberos.principal", “hbase/_HOST@IO-INT.COM”);

 

    UserGroupInformation.setConfiguration(config);

      

     var ugi: UserGroupInformation = null;

      if (SparkFiles.get(properties.keytab) != null

        && (new java.io.File(SparkFiles.get(properties.keytab)).exists)) {

        ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(properties.kerberosPrincipal,

          SparkFiles.get(properties.keytab));

      } else {

        ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(properties.kerberosPrincipal,

          properties.keytab);

      }

    

 

    val connection = ConnectionFactory.createConnection(config);

    return (connection, ugi);

  }

 

and we connect to hbase:

 ….foreachRDD { rdd =>

      if (!rdd.isEmpty()) {

        //var ugi: UserGroupInformation = Utils.getHbaseConnection(properties)._2

        rdd.foreachPartition { partition =>

          val connection = Utils.getHbaseConnection(propsObj)._1

          val table = … 

          partition.foreach { json =>

            

          }

          table.put(puts)

          table.close()

          connection.close()

        }

      }

    }

 

 

Keytab file is not getting copied to yarn staging/temp directory, we are not getting that in SparkFiles.get… and if we pass keytab with --files, spark-submit is failing because it’s there in --keytab already. 

 

Thanks,

Sudhir


Re: One question / kerberos, yarn-cluster -> connection to hbase

Posted by Michael Gummelt <mg...@mesosphere.io>.
What version of Spark are you using?  Can you provide your logs with DEBUG
logging enabled?  You should see these logs:
https://github.com/apache/spark/blob/master/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala#L475

On Wed, May 24, 2017 at 10:07 AM, Sudhir Jangir <su...@infoobjects.com>
wrote:

> Facing one issue with Kerberos enabled Hadoop/CDH cluster.
>
>
>
> We are trying to run a streaming job on yarn-cluster, which interacts with
> Kafka (direct stream), and hbase.
>
>
>
> Somehow, we are not able to connect to hbase in the cluster mode. We use
> keytab to login to hbase.
>
>
>
> This is what we do:
>
> *spark-submit --master yarn-cluster --keytab "dev.keytab" --principal
> "dev@IO-INT.COM <de...@IO-INT.COM>"*  --conf "spark.executor.
> extraJavaOptions=-Dlog4j.configuration=log4j_executor_conf.properties
> -XX:+UseG1GC" --conf "spark.driver.extraJavaOptions=-Dlog4j.
> configuration=log4j_driver_conf.properties -XX:+UseG1GC" --conf
> spark.yarn.stagingDir=hdfs:///tmp/spark/ --files
> "job.properties,log4j_driver_conf.properties,log4j_executor_conf.properties"
> service-0.0.1-SNAPSHOT.jar job.properties
>
>
>
> To connect to hbase:
>
>  def getHbaseConnection(properties: SerializedProperties): (Connection,
> UserGroupInformation) = {
>
>
>
>
>
>     val config = HBaseConfiguration.create();
>
>     config.set("hbase.zookeeper.quorum", HBASE_ZOOKEEPER_QUORUM_VALUE);
>
>     config.set("hbase.zookeeper.property.clientPort", 2181);
>
>     config.set("hadoop.security.authentication", "kerberos");
>
>     config.set("hbase.security.authentication", "kerberos");
>
>     config.set("hbase.cluster.distributed", "true");
>
>     config.set("hbase.rpc.protection", "privacy");
>
>    config.set("hbase.regionserver.kerberos.principal", “hbase/_
> HOST@IO-INT.COM”);
>
>     config.set("hbase.master.kerberos.principal", “hbase/_HOST@IO-INT.COM
> ”);
>
>
>
>     UserGroupInformation.setConfiguration(config);
>
>
>
>      var ugi: UserGroupInformation = null;
>
>       if (SparkFiles.get(properties.keytab) != null
>
>         && (new java.io.File(SparkFiles.get(properties.keytab)).exists)) {
>
>         ugi = UserGroupInformation.loginUserFromKeytabAndReturnUG
> I(properties.kerberosPrincipal,
>
>           SparkFiles.get(properties.keytab));
>
>       } else {
>
>         ugi = UserGroupInformation.loginUserFromKeytabAndReturnUG
> I(properties.kerberosPrincipal,
>
>           properties.keytab);
>
>       }
>
>
>
>
>
>     val connection = ConnectionFactory.createConnection(config);
>
>     return (connection, ugi);
>
>   }
>
>
>
> and we connect to hbase:
>
>  ….foreachRDD { rdd =>
>
>       if (!rdd.isEmpty()) {
>
>         //*var* *ugi*: UserGroupInformation = Utils.getHbaseConnection(
> properties)._2
>
>         rdd.foreachPartition { partition =>
>
>           val connection = Utils.getHbaseConnection(propsObj)._1
>
>           val table = …
>
>           partition.foreach { json =>
>
>
>
>           }
>
>           table.put(puts)
>
>           table.close()
>
>           connection.close()
>
>         }
>
>       }
>
>     }
>
>
>
>
>
> Keytab file is not getting copied to yarn staging/temp directory, we are
> not getting that in SparkFiles.get… and if we pass keytab with --files,
> spark-submit is failing because it’s there in --keytab already.
>
>
>
> Thanks,
>
> Sudhir
>



-- 
Michael Gummelt
Software Engineer
Mesosphere