You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/01/18 00:33:27 UTC
[spark] branch master updated: [SPARK-33730][PYTHON] Standardize
warning types
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 098f226 [SPARK-33730][PYTHON] Standardize warning types
098f226 is described below
commit 098f2268e4ad43dd9453ada91161ea428dd57d16
Author: zero323 <ms...@gmail.com>
AuthorDate: Mon Jan 18 09:32:55 2021 +0900
[SPARK-33730][PYTHON] Standardize warning types
### What changes were proposed in this pull request?
This PR:
- Adds as small hierarchy of warnings to be used in PySpark applications. These extend built-in classes and top level `PySparkWarning`.
- Replaces `DeprecationWarnings` (intended for developers) with PySpark specific subclasses of `FutureWarning` (intended for end users).
### Why are the changes needed?
- To be more precise and add users additional control (in addition to standard module level filters) over PySpark warnings handling.
- Correct semantics (at the moment we use `DeprecationWarning` in user-facing API, but it is intended "for warnings about deprecated features when those warnings are intended for other Python developers").
### Does this PR introduce _any_ user-facing change?
Yes. Code can raise different type of warning than before.
### How was this patch tested?
Existing tests.
Closes #30985 from zero323/SPARK-33730.
Authored-by: zero323 <ms...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
python/pyspark/ml/clustering.py | 2 +-
python/pyspark/mllib/classification.py | 2 +-
python/pyspark/mllib/regression.py | 7 ++++---
python/pyspark/rdd.py | 10 ++++++----
python/pyspark/sql/catalog.py | 6 ++++--
python/pyspark/sql/column.py | 6 ++++--
python/pyspark/sql/context.py | 15 ++++++++++-----
python/pyspark/sql/dataframe.py | 4 +++-
python/pyspark/sql/functions.py | 6 +++---
python/pyspark/worker.py | 10 ++++++++--
10 files changed, 44 insertions(+), 24 deletions(-)
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 54c1a43..60726cb 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -821,7 +821,7 @@ class BisectingKMeansModel(JavaModel, _BisectingKMeansParams, JavaMLWritable, Ja
"""
warnings.warn("Deprecated in 3.0.0. It will be removed in future versions. Use "
"ClusteringEvaluator instead. You can also get the cost on the training "
- "dataset in the summary.", DeprecationWarning)
+ "dataset in the summary.", FutureWarning)
return self._call_java("computeCost", dataset)
@property
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index bd43e91..5705401 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -324,7 +324,7 @@ class LogisticRegressionWithSGD(object):
"""
warnings.warn(
"Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
- "LogisticRegressionWithLBFGS.", DeprecationWarning)
+ "LogisticRegressionWithLBFGS.", FutureWarning)
def train(rdd, i):
return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index c224e38..3908e4a 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -299,7 +299,7 @@ class LinearRegressionWithSGD(object):
(default: 0.001)
"""
warnings.warn(
- "Deprecated in 2.0.0. Use ml.regression.LinearRegression.", DeprecationWarning)
+ "Deprecated in 2.0.0. Use ml.regression.LinearRegression.", FutureWarning)
def train(rdd, i):
return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
@@ -453,7 +453,8 @@ class LassoWithSGD(object):
warnings.warn(
"Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. "
"Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.",
- DeprecationWarning)
+ FutureWarning
+ )
def train(rdd, i):
return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
@@ -607,7 +608,7 @@ class RidgeRegressionWithSGD(object):
warnings.warn(
"Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. "
"Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for "
- "LinearRegression.", DeprecationWarning)
+ "LinearRegression.", FutureWarning)
def train(rdd, i):
return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 34faaac..3fed2bc 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -48,7 +48,6 @@ from pyspark.shuffle import Aggregator, ExternalMerger, \
from pyspark.traceback_utils import SCCallSiteSync
from pyspark.util import fail_on_stopiteration, _parse_memory
-
__all__ = ["RDD"]
@@ -448,8 +447,10 @@ class RDD(object):
>>> rdd.mapPartitionsWithSplit(f).sum()
6
"""
- warnings.warn("mapPartitionsWithSplit is deprecated; "
- "use mapPartitionsWithIndex instead", DeprecationWarning, stacklevel=2)
+ warnings.warn(
+ "mapPartitionsWithSplit is deprecated; use mapPartitionsWithIndex instead",
+ FutureWarning, stacklevel=2
+ )
return self.mapPartitionsWithIndex(f, preservesPartitioning)
def getNumPartitions(self):
@@ -960,7 +961,8 @@ class RDD(object):
warnings.warn(
"Deprecated in 3.1, Use pyspark.InheritableThread with "
"the pinned thread mode enabled.",
- DeprecationWarning)
+ FutureWarning
+ )
with SCCallSiteSync(self.context) as css:
sock_info = self.ctx._jvm.PythonRDD.collectAndServeWithJobGroup(
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 70d68a0..d4b7d4e 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -153,7 +153,8 @@ class Catalog(object):
"""
warnings.warn(
"createExternalTable is deprecated since Spark 2.2, please use createTable instead.",
- DeprecationWarning)
+ FutureWarning
+ )
return self.createTable(tableName, path, source, schema, **options)
def createTable(
@@ -251,7 +252,8 @@ class Catalog(object):
"""
warnings.warn(
"Deprecated in 2.3.0. Use spark.udf.register instead.",
- DeprecationWarning)
+ FutureWarning
+ )
return self._sparkSession.udf.register(name, f, returnType)
@since(2.0)
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 7608054..391ee5e 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -323,7 +323,8 @@ class Column(object):
"A column as 'key' in getItem is deprecated as of Spark 3.0, and will not "
"be supported in the future release. Use `column[key]` or `column.key` syntax "
"instead.",
- DeprecationWarning)
+ FutureWarning
+ )
return self[key]
def getField(self, name):
@@ -354,7 +355,8 @@ class Column(object):
"A column as 'name' in getField is deprecated as of Spark 3.0, and will not "
"be supported in the future release. Use `column[name]` or `column.name` syntax "
"instead.",
- DeprecationWarning)
+ FutureWarning
+ )
return self[name]
def withField(self, fieldName, col):
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 416bbde..ade82da 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -76,7 +76,8 @@ class SQLContext(object):
if sparkSession is None:
warnings.warn(
"Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
- DeprecationWarning)
+ FutureWarning
+ )
self._sc = sparkContext
self._jsc = self._sc._jsc
@@ -123,7 +124,8 @@ class SQLContext(object):
"""
warnings.warn(
"Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
- DeprecationWarning)
+ FutureWarning
+ )
if (cls._instantiatedContext is None
or SQLContext._instantiatedContext._sc._jsc is None):
@@ -229,7 +231,8 @@ class SQLContext(object):
"""
warnings.warn(
"Deprecated in 2.3.0. Use spark.udf.register instead.",
- DeprecationWarning)
+ FutureWarning
+ )
return self.sparkSession.udf.register(name, f, returnType)
def registerJavaFunction(self, name, javaClassName, returnType=None):
@@ -243,7 +246,8 @@ class SQLContext(object):
"""
warnings.warn(
"Deprecated in 2.3.0. Use spark.udf.registerJavaFunction instead.",
- DeprecationWarning)
+ FutureWarning
+ )
return self.sparkSession.udf.registerJavaFunction(name, javaClassName, returnType)
# TODO(andrew): delete this once we refactor things to take in SparkSession
@@ -597,7 +601,8 @@ class HiveContext(SQLContext):
warnings.warn(
"HiveContext is deprecated in Spark 2.0.0. Please use " +
"SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
- DeprecationWarning)
+ FutureWarning
+ )
if jhiveContext is None:
sparkContext._conf.set("spark.sql.catalogImplementation", "hive")
sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate()
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index fe7d26d..e09e87c6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -135,7 +135,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
>>> spark.catalog.dropTempView("people")
"""
warnings.warn(
- "Deprecated in 2.0, use createOrReplaceTempView instead.", DeprecationWarning)
+ "Deprecated in 2.0, use createOrReplaceTempView instead.",
+ FutureWarning
+ )
self._jdf.createOrReplaceTempView(name)
def createTempView(self, name):
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index c9d24dc..45dbedf 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -476,7 +476,7 @@ def toDegrees(col):
.. deprecated:: 2.1.0
Use :func:`degrees` instead.
"""
- warnings.warn("Deprecated in 2.1, use degrees instead.", DeprecationWarning)
+ warnings.warn("Deprecated in 2.1, use degrees instead.", FutureWarning)
return degrees(col)
@@ -486,7 +486,7 @@ def toRadians(col):
.. deprecated:: 2.1.0
Use :func:`radians` instead.
"""
- warnings.warn("Deprecated in 2.1, use radians instead.", DeprecationWarning)
+ warnings.warn("Deprecated in 2.1, use radians instead.", FutureWarning)
return radians(col)
@@ -795,7 +795,7 @@ def approxCountDistinct(col, rsd=None):
.. deprecated:: 2.1.0
Use :func:`approx_count_distinct` instead.
"""
- warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning)
+ warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", FutureWarning)
return approx_count_distinct(col, rsd)
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 8ca4bb3..3808bc3 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -21,7 +21,7 @@ Worker that receives input from Piped RDD.
import os
import sys
import time
-from inspect import getfullargspec
+from inspect import currentframe, getframeinfo, getfullargspec
import importlib
# 'resource' is a Unix specific module.
has_resource_module = True
@@ -30,6 +30,7 @@ try:
except ImportError:
has_resource_module = False
import traceback
+import warnings
from pyspark.accumulators import _accumulatorRegistry
from pyspark.broadcast import Broadcast, _broadcastRegistry
@@ -500,7 +501,12 @@ def main(infile, outfile):
except (resource.error, OSError, ValueError) as e:
# not all systems support resource limits, so warn instead of failing
- print("WARN: Failed to set memory limit: {0}\n".format(e), file=sys.stderr)
+ print(warnings.formatwarning(
+ "Failed to set memory limit: {0}".format(e),
+ ResourceWarning,
+ __file__,
+ getframeinfo(currentframe()).lineno
+ ), file=sys.stderr)
# initialize global state
taskContext = None
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org