You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/07/02 06:35:45 UTC
spark git commit: [SPARK-24665][PYSPARK] Use SQLConf in PySpark to
manage all sql configs
Repository: spark
Updated Branches:
refs/heads/master f825847c8 -> 8f91c697e
[SPARK-24665][PYSPARK] Use SQLConf in PySpark to manage all sql configs
## What changes were proposed in this pull request?
Use SQLConf for PySpark to manage all sql configs, drop all the hard code in config usage.
## How was this patch tested?
Existing UT.
Author: Yuanjian Li <xy...@gmail.com>
Closes #21648 from xuanyuanking/SPARK-24665.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f91c697
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f91c697
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f91c697
Branch: refs/heads/master
Commit: 8f91c697e251423b826cd6ac4ddd9e2dac15b96e
Parents: f825847
Author: Yuanjian Li <xy...@gmail.com>
Authored: Mon Jul 2 14:35:37 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Mon Jul 2 14:35:37 2018 +0800
----------------------------------------------------------------------
python/pyspark/sql/context.py | 5 +++
python/pyspark/sql/dataframe.py | 42 +++++---------------
.../org/apache/spark/sql/internal/SQLConf.scala | 6 +++
3 files changed, 21 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/8f91c697/python/pyspark/sql/context.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index e9ec7ba..9c094dd 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -93,6 +93,11 @@ class SQLContext(object):
"""
return self._jsqlContext
+ @property
+ def _conf(self):
+ """Accessor for the JVM SQL-specific configurations"""
+ return self.sparkSession._jsparkSession.sessionState().conf()
+
@classmethod
@since(1.6)
def getOrCreate(cls, sc):
http://git-wip-us.apache.org/repos/asf/spark/blob/8f91c697/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index cb3fe44..c40aea9 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -354,32 +354,12 @@ class DataFrame(object):
else:
print(self._jdf.showString(n, int(truncate), vertical))
- @property
- def _eager_eval(self):
- """Returns true if the eager evaluation enabled.
- """
- return self.sql_ctx.getConf(
- "spark.sql.repl.eagerEval.enabled", "false").lower() == "true"
-
- @property
- def _max_num_rows(self):
- """Returns the max row number for eager evaluation.
- """
- return int(self.sql_ctx.getConf(
- "spark.sql.repl.eagerEval.maxNumRows", "20"))
-
- @property
- def _truncate(self):
- """Returns the truncate length for eager evaluation.
- """
- return int(self.sql_ctx.getConf(
- "spark.sql.repl.eagerEval.truncate", "20"))
-
def __repr__(self):
- if not self._support_repr_html and self._eager_eval:
+ if not self._support_repr_html and self.sql_ctx._conf.isReplEagerEvalEnabled():
vertical = False
return self._jdf.showString(
- self._max_num_rows, self._truncate, vertical)
+ self.sql_ctx._conf.replEagerEvalMaxNumRows(),
+ self.sql_ctx._conf.replEagerEvalTruncate(), vertical)
else:
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
@@ -391,10 +371,10 @@ class DataFrame(object):
import cgi
if not self._support_repr_html:
self._support_repr_html = True
- if self._eager_eval:
- max_num_rows = max(self._max_num_rows, 0)
+ if self.sql_ctx._conf.isReplEagerEvalEnabled():
+ max_num_rows = max(self.sql_ctx._conf.replEagerEvalMaxNumRows(), 0)
sock_info = self._jdf.getRowsToPython(
- max_num_rows, self._truncate)
+ max_num_rows, self.sql_ctx._conf.replEagerEvalTruncate())
rows = list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
head = rows[0]
row_data = rows[1:]
@@ -2049,13 +2029,12 @@ class DataFrame(object):
import pandas as pd
- if self.sql_ctx.getConf("spark.sql.execution.pandas.respectSessionTimeZone").lower() \
- == "true":
- timezone = self.sql_ctx.getConf("spark.sql.session.timeZone")
+ if self.sql_ctx._conf.pandasRespectSessionTimeZone():
+ timezone = self.sql_ctx._conf.sessionLocalTimeZone()
else:
timezone = None
- if self.sql_ctx.getConf("spark.sql.execution.arrow.enabled", "false").lower() == "true":
+ if self.sql_ctx._conf.arrowEnabled():
use_arrow = True
try:
from pyspark.sql.types import to_arrow_schema
@@ -2065,8 +2044,7 @@ class DataFrame(object):
to_arrow_schema(self.schema)
except Exception as e:
- if self.sql_ctx.getConf("spark.sql.execution.arrow.fallback.enabled", "true") \
- .lower() == "true":
+ if self.sql_ctx._conf.arrowFallbackEnabled():
msg = (
"toPandas attempted Arrow optimization because "
"'spark.sql.execution.arrow.enabled' is set to true; however, "
http://git-wip-us.apache.org/repos/asf/spark/blob/8f91c697/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index da1c34c..e2c48e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1728,6 +1728,12 @@ class SQLConf extends Serializable with Logging {
def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL)
+ def isReplEagerEvalEnabled: Boolean = getConf(SQLConf.REPL_EAGER_EVAL_ENABLED)
+
+ def replEagerEvalMaxNumRows: Int = getConf(SQLConf.REPL_EAGER_EVAL_MAX_NUM_ROWS)
+
+ def replEagerEvalTruncate: Int = getConf(SQLConf.REPL_EAGER_EVAL_TRUNCATE)
+
/** ********************** SQLConf functionality methods ************ */
/** Set Spark SQL configuration properties. */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org