You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by td...@apache.org on 2015/05/21 23:08:25 UTC
spark git commit: [SPARK-7478] [SQL] Added SQLContext.getOrCreate
Repository: spark
Updated Branches:
refs/heads/master 30f3f556f -> 3d0cccc85
[SPARK-7478] [SQL] Added SQLContext.getOrCreate
Having a SQLContext singleton would make it easier for applications to use a lazily instantiated single shared instance of SQLContext when needed. It would avoid problems like
1. In REPL/notebook environment, rerunning the line {{val sqlContext = new SQLContext}} multiple times created different contexts while overriding the reference to previous context, leading to issues like registered temp tables going missing.
2. In Streaming, creating SQLContext directly leads to serialization/deserialization issues when attempting to recover from DStream checkpoints. See [SPARK-6770]. Also to get around this problem I had to suggest creating a singleton instance - https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
This can be solved by {{SQLContext.getOrCreate}} which get or creates a new singleton instance of SQLContext using either a given SparkContext or a given SparkConf.
rxin marmbrus
Author: Tathagata Das <ta...@gmail.com>
Closes #6006 from tdas/SPARK-7478 and squashes the following commits:
25f4da9 [Tathagata Das] Addressed comments.
79fe069 [Tathagata Das] Added comments.
c66ca76 [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7478
48adb14 [Tathagata Das] Removed HiveContext.getOrCreate
bf8cf50 [Tathagata Das] Fix more bug
dec5594 [Tathagata Das] Fixed bug
b4e9721 [Tathagata Das] Remove unnecessary import
4ef513b [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7478
d3ea8e4 [Tathagata Das] Added HiveContext
83bc950 [Tathagata Das] Updated tests
f82ae81 [Tathagata Das] Fixed test
bc72868 [Tathagata Das] Added SQLContext.getOrCreate
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d0cccc8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d0cccc8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d0cccc8
Branch: refs/heads/master
Commit: 3d0cccc85850ca9c79f3e5ff7395bd04d212b063
Parents: 30f3f55
Author: Tathagata Das <ta...@gmail.com>
Authored: Thu May 21 14:08:20 2015 -0700
Committer: Tathagata Das <ta...@gmail.com>
Committed: Thu May 21 14:08:20 2015 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/SQLContext.scala | 47 ++++++++++++++++++-
.../org/apache/spark/sql/SQLContextSuite.scala | 49 ++++++++++++++++++++
2 files changed, 95 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3d0cccc8/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 304e958..1ea596d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
import java.beans.Introspector
import java.util.Properties
+import java.util.concurrent.atomic.AtomicReference
import scala.collection.JavaConversions._
import scala.collection.immutable
@@ -1270,9 +1271,53 @@ class SQLContext(@transient val sparkContext: SparkContext)
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
- // End of eeprecated methods
+ // End of deprecated methods
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
+
+
+ // Register a succesfully instantiatd context to the singleton. This should be at the end of
+ // the class definition so that the singleton is updated only if there is no exception in the
+ // construction of the instance.
+ SQLContext.setLastInstantiatedContext(self)
}
+/**
+ * This SQLContext object contains utility functions to create a singleton SQLContext instance,
+ * or to get the last created SQLContext instance.
+ */
+object SQLContext {
+
+ private val INSTANTIATION_LOCK = new Object()
+
+ /**
+ * Reference to the last created SQLContext.
+ */
+ @transient private val lastInstantiatedContext = new AtomicReference[SQLContext]()
+
+ /**
+ * Get the singleton SQLContext if it exists or create a new one using the given SparkContext.
+ * This function can be used to create a singleton SQLContext object that can be shared across
+ * the JVM.
+ */
+ def getOrCreate(sparkContext: SparkContext): SQLContext = {
+ INSTANTIATION_LOCK.synchronized {
+ if (lastInstantiatedContext.get() == null) {
+ new SQLContext(sparkContext)
+ }
+ }
+ lastInstantiatedContext.get()
+ }
+
+ private[sql] def clearLastInstantiatedContext(): Unit = {
+ INSTANTIATION_LOCK.synchronized {
+ lastInstantiatedContext.set(null)
+ }
+ }
+ private[sql] def setLastInstantiatedContext(sqlContext: SQLContext): Unit = {
+ INSTANTIATION_LOCK.synchronized {
+ lastInstantiatedContext.set(sqlContext)
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/3d0cccc8/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
new file mode 100644
index 0000000..f186bc1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -0,0 +1,49 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.sql.test.TestSQLContext
+
+class SQLContextSuite extends FunSuite with BeforeAndAfterAll {
+
+ private val testSqlContext = TestSQLContext
+ private val testSparkContext = TestSQLContext.sparkContext
+
+ override def afterAll(): Unit = {
+ SQLContext.setLastInstantiatedContext(testSqlContext)
+ }
+
+ test("getOrCreate instantiates SQLContext") {
+ SQLContext.clearLastInstantiatedContext()
+ val sqlContext = SQLContext.getOrCreate(testSparkContext)
+ assert(sqlContext != null, "SQLContext.getOrCreate returned null")
+ assert(SQLContext.getOrCreate(testSparkContext).eq(sqlContext),
+ "SQLContext created by SQLContext.getOrCreate not returned by SQLContext.getOrCreate")
+ }
+
+ test("getOrCreate gets last explicitly instantiated SQLContext") {
+ SQLContext.clearLastInstantiatedContext()
+ val sqlContext = new SQLContext(testSparkContext)
+ assert(SQLContext.getOrCreate(testSparkContext) != null,
+ "SQLContext.getOrCreate after explicitly created SQLContext returned null")
+ assert(SQLContext.getOrCreate(testSparkContext).eq(sqlContext),
+ "SQLContext.getOrCreate after explicitly created SQLContext did not return the context")
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org