You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/07/31 20:26:48 UTC

git commit: [SPARK-2397][SQL] Deprecate LocalHiveContext

Repository: spark
Updated Branches:
  refs/heads/master 3072b9602 -> 72cfb1398


[SPARK-2397][SQL] Deprecate LocalHiveContext

LocalHiveContext is redundant with HiveContext.  The only difference is it creates `./metastore` instead of `./metastore_db`.

Author: Michael Armbrust <mi...@databricks.com>

Closes #1641 from marmbrus/localHiveContext and squashes the following commits:

e5ec497 [Michael Armbrust] Add deprecation version
626e056 [Michael Armbrust] Don't remove from imports yet
905cc5f [Michael Armbrust] Merge remote-tracking branch 'apache/master' into localHiveContext
1c2727e [Michael Armbrust] Deprecate LocalHiveContext


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72cfb139
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72cfb139
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72cfb139

Branch: refs/heads/master
Commit: 72cfb13987bab07461266905930f84619b3a0068
Parents: 3072b96
Author: Michael Armbrust <mi...@databricks.com>
Authored: Thu Jul 31 11:26:43 2014 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Thu Jul 31 11:26:43 2014 -0700

----------------------------------------------------------------------
 .../sbt_app_hive/src/main/scala/HiveApp.scala        |  4 ++--
 docs/sql-programming-guide.md                        |  6 +++---
 .../spark/examples/sql/hive/HiveFromSpark.scala      |  4 ++--
 python/pyspark/sql.py                                |  6 ++++++
 .../org/apache/spark/sql/hive/HiveContext.scala      |  7 +++++--
 .../scala/org/apache/spark/sql/hive/TestHive.scala   | 15 ++++++++++++---
 6 files changed, 30 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
index 7257d17..a21410f 100644
--- a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
+++ b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.{ListBuffer, Queue}
 import org.apache.spark.SparkConf
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
 
 case class Person(name: String, age: Int)
 
@@ -34,7 +34,7 @@ object SparkSqlExample {
       case None => new SparkConf().setAppName("Simple Sql App")
     }
     val sc = new SparkContext(conf)
-    val hiveContext = new LocalHiveContext(sc)
+    val hiveContext = new HiveContext(sc)
 
     import hiveContext._
     hql("DROP TABLE IF EXISTS src")

http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/docs/sql-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 156e0ae..a047d32 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -487,9 +487,9 @@ Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
 
 When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and
 adds support for finding tables in in the MetaStore and writing queries using HiveQL. Users who do
-not have an existing Hive deployment can also experiment with the `LocalHiveContext`,
-which is similar to `HiveContext`, but creates a local copy of the `metastore` and `warehouse`
-automatically.
+not have an existing Hive deployment can still create a HiveContext.  When not configured by the
+hive-site.xml, the context automatically creates `metastore_db` and `warehouse` in the current
+directory.
 
 {% highlight scala %}
 // sc is an existing SparkContext.

http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
index 66a23fa..dc5290f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
@@ -19,7 +19,7 @@ package org.apache.spark.examples.sql.hive
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql._
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
 
 object HiveFromSpark {
   case class Record(key: Int, value: String)
@@ -31,7 +31,7 @@ object HiveFromSpark {
     // A local hive context creates an instance of the Hive Metastore in process, storing the
     // the warehouse data in the current directory.  This location can be overridden by
     // specifying a second parameter to the constructor.
-    val hiveContext = new LocalHiveContext(sc)
+    val hiveContext = new HiveContext(sc)
     import hiveContext._
 
     hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")

http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/python/pyspark/sql.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 13f0ed4..9388ead 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 #
 
+import warnings
+
 from pyspark.rdd import RDD, PipelinedRDD
 from pyspark.serializers import BatchedSerializer, PickleSerializer
 
@@ -813,6 +815,10 @@ class LocalHiveContext(HiveContext):
     130091
     """
 
+    def __init__(self, sparkContext, sqlContext=None):
+      HiveContext.__init__(self, sparkContext, sqlContext)
+      warnings.warn("LocalHiveContext is deprecated.  Use HiveContext instead.", DeprecationWarning)
+
     def _get_hive_ctx(self):
         return self._jvm.LocalHiveContext(self._jsc.sc())
 

http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index b413373..27b444d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -42,9 +42,12 @@ import org.apache.spark.sql.execution.{Command => PhysicalCommand}
 import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
 
 /**
- * Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
- * created with data stored in ./metadata.  Warehouse data is stored in in ./warehouse.
+ * DEPRECATED: Use HiveContext instead.
  */
+@deprecated("""
+  Use HiveContext instead.  It will still create a local metastore if one is not specified.
+  However, note that the default directory is ./metastore_db, not ./metastore
+  """, "1.1")
 class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   lazy val metastorePath = new File("metastore").getCanonicalPath

http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 9386008..c50e8c4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -53,15 +53,24 @@ object TestHive
  * hive metastore seems to lead to weird non-deterministic failures.  Therefore, the execution of
  * test cases that rely on TestHive must be serialized.
  */
-class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
+class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   self =>
 
   // By clearing the port we force Spark to pick a new one.  This allows us to rerun tests
   // without restarting the JVM.
   System.clearProperty("spark.hostPort")
 
-  override lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
-  override lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+  lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
+  lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+
+  /** Sets up the system initially or after a RESET command */
+  protected def configure() {
+    set("javax.jdo.option.ConnectionURL",
+      s"jdbc:derby:;databaseName=$metastorePath;create=true")
+    set("hive.metastore.warehouse.dir", warehousePath)
+  }
+
+  configure() // Must be called before initializing the catalog below.
 
   /** The location of the compiled hive distribution */
   lazy val hiveHome = envVarToFile("HIVE_HOME")