You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/09/27 01:51:28 UTC
spark git commit: [SPARK-25540][SQL][PYSPARK] Make HiveContext in
PySpark behave as the same as Scala.
Repository: spark
Updated Branches:
refs/heads/master d0990e3df -> c3c45cbd7
[SPARK-25540][SQL][PYSPARK] Make HiveContext in PySpark behave as the same as Scala.
## What changes were proposed in this pull request?
In Scala, `HiveContext` sets a config `spark.sql.catalogImplementation` of the given `SparkContext` and then passes to `SparkSession.builder`.
The `HiveContext` in PySpark should behave as the same as Scala.
## How was this patch tested?
Existing tests.
Closes #22552 from ueshin/issues/SPARK-25540/hive_context.
Authored-by: Takuya UESHIN <ue...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3c45cbd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3c45cbd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3c45cbd
Branch: refs/heads/master
Commit: c3c45cbd76d91d591d98cf8411fcfd30079f5969
Parents: d0990e3
Author: Takuya UESHIN <ue...@databricks.com>
Authored: Thu Sep 27 09:51:20 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu Sep 27 09:51:20 2018 +0800
----------------------------------------------------------------------
python/pyspark/sql/context.py | 3 ++-
python/pyspark/sql/session.py | 19 ++++++++++++++-----
2 files changed, 16 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/c3c45cbd/python/pyspark/sql/context.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 9c094dd..1938965 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -485,7 +485,8 @@ class HiveContext(SQLContext):
"SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
DeprecationWarning)
if jhiveContext is None:
- sparkSession = SparkSession.builder.enableHiveSupport().getOrCreate()
+ sparkContext._conf.set("spark.sql.catalogImplementation", "hive")
+ sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate()
else:
sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession())
SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext)
http://git-wip-us.apache.org/repos/asf/spark/blob/c3c45cbd/python/pyspark/sql/session.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 51a38eb..a5e2872 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -83,6 +83,7 @@ class SparkSession(object):
_lock = RLock()
_options = {}
+ _sc = None
@since(2.0)
def config(self, key=None, value=None, conf=None):
@@ -139,6 +140,11 @@ class SparkSession(object):
"""
return self.config("spark.sql.catalogImplementation", "hive")
+ def _sparkContext(self, sc):
+ with self._lock:
+ self._sc = sc
+ return self
+
@since(2.0)
def getOrCreate(self):
"""Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
@@ -167,11 +173,14 @@ class SparkSession(object):
from pyspark.conf import SparkConf
session = SparkSession._instantiatedSession
if session is None or session._sc._jsc is None:
- sparkConf = SparkConf()
- for key, value in self._options.items():
- sparkConf.set(key, value)
- sc = SparkContext.getOrCreate(sparkConf)
- # This SparkContext may be an existing one.
+ if self._sc is not None:
+ sc = self._sc
+ else:
+ sparkConf = SparkConf()
+ for key, value in self._options.items():
+ sparkConf.set(key, value)
+ sc = SparkContext.getOrCreate(sparkConf)
+ # This SparkContext may be an existing one.
for key, value in self._options.items():
# we need to propagate the confs
# before we create the SparkSession. Otherwise, confs like
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org