You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/11/09 08:22:40 UTC
spark git commit: [SPARK-22463][YARN][SQL][HIVE] add hadoop/hive/hbase/etc configuration files in SPARK_CONF_DIR to distribute archive

Repository: spark
Updated Branches:
  refs/heads/master 98be55c0f -> c755b0d91


[SPARK-22463][YARN][SQL][HIVE] add hadoop/hive/hbase/etc configuration files in SPARK_CONF_DIR to distribute archive

## What changes were proposed in this pull request?
When I ran self contained sql apps, such as
```scala
import org.apache.spark.sql.SparkSession

object ShowHiveTables {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName("Show Hive Tables")
      .enableHiveSupport()
      .getOrCreate()
    spark.sql("show tables").show()
    spark.stop()
  }
}
```
with **yarn cluster** mode and `hive-site.xml` correctly within `$SPARK_HOME/conf`,they failed to connect the right hive metestore for not seeing hive-site.xml in AM/Driver's classpath.

Although submitting them with `--files/--jars local/path/to/hive-site.xml` or puting it to `$HADOOP_CONF_DIR/YARN_CONF_DIR` can make these apps works well in cluster mode as client mode, according to the official doc, see  http://spark.apache.org/docs/latest/sql-programming-guide.html#hive-tables
> Configuration of Hive is done by placing your hive-site.xml, core-site.xml (for security configuration), and hdfs-site.xml (for HDFS configuration) file in conf/.

We may respect these configuration files too or modify the doc for hive-tables in cluster mode.
## How was this patch tested?

cc cloud-fan gatorsmile

Author: Kent Yao <ya...@hotmail.com>

Closes #19663 from yaooqinn/SPARK-21888.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c755b0d9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c755b0d9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c755b0d9

Branch: refs/heads/master
Commit: c755b0d910d68e7921807f2f2ac1e3fac7a8f357
Parents: 98be55c
Author: Kent Yao <ya...@hotmail.com>
Authored: Thu Nov 9 09:22:33 2017 +0100
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu Nov 9 09:22:33 2017 +0100

----------------------------------------------------------------------
 .../scala/org/apache/spark/deploy/yarn/Client.scala  | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c755b0d9/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 1fe25c4..99e7d46 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.io.{File, FileOutputStream, IOException, OutputStreamWriter}
+import java.io.{FileSystem => _, _}
 import java.net.{InetAddress, UnknownHostException, URI}
 import java.nio.ByteBuffer
 import java.nio.charset.StandardCharsets
@@ -687,6 +687,19 @@ private[spark] class Client(
   private def createConfArchive(): File = {
     val hadoopConfFiles = new HashMap[String, File]()
 
+    // SPARK_CONF_DIR shows up in the classpath before HADOOP_CONF_DIR/YARN_CONF_DIR
+    sys.env.get("SPARK_CONF_DIR").foreach { localConfDir =>
+      val dir = new File(localConfDir)
+      if (dir.isDirectory) {
+        val files = dir.listFiles(new FileFilter {
+          override def accept(pathname: File): Boolean = {
+            pathname.isFile && pathname.getName.endsWith(".xml")
+          }
+        })
+        files.foreach { f => hadoopConfFiles(f.getName) = f }
+      }
+    }
+
     Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").foreach { envKey =>
       sys.env.get(envKey).foreach { path =>
         val dir = new File(path)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org