You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ue...@apache.org on 2021/10/04 21:33:08 UTC
[spark] branch master updated: [SPARK-36906][PYTHON] Inline type hints for conf.py and observation.py in python/pyspark/sql

This is an automated email from the ASF dual-hosted git repository.

ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8181260  [SPARK-36906][PYTHON] Inline type hints for conf.py and observation.py in python/pyspark/sql
8181260 is described below

commit 81812606cc32a41863c86695c4710b0e914b7e3c
Author: Xinrong Meng <xi...@databricks.com>
AuthorDate: Mon Oct 4 14:32:25 2021 -0700

    [SPARK-36906][PYTHON] Inline type hints for conf.py and observation.py in python/pyspark/sql
    
    ### What changes were proposed in this pull request?
    Inline type hints for conf.py and observation.py in python/pyspark/sql.
    
    ### Why are the changes needed?
    Currently, there is type hint stub files (*.pyi) to show the expected types for functions, but we can also take advantage of static type checking within the functions by inlining the type hints.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    It has a DOC typo fix:
    `Metrics are aggregation expressions, which are applied to the DataFrame while **is** is being`
    is changed to
    `Metrics are aggregation expressions, which are applied to the DataFrame while **it** is being`
    
    ### How was this patch tested?
    Existing test.
    
    Closes #34159 from xinrong-databricks/inline_conf_observation.
    
    Authored-by: Xinrong Meng <xi...@databricks.com>
    Signed-off-by: Takuya UESHIN <ue...@databricks.com>
---
 python/pyspark/__init__.pyi        |  4 ++--
 python/pyspark/sql/conf.py         | 19 +++++++++++--------
 python/pyspark/sql/conf.pyi        | 27 ---------------------------
 python/pyspark/sql/observation.py  | 24 ++++++++++++++----------
 python/pyspark/sql/observation.pyi | 27 ---------------------------
 5 files changed, 27 insertions(+), 74 deletions(-)

diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi
index 07cbccb..f85319b 100644
--- a/python/pyspark/__init__.pyi
+++ b/python/pyspark/__init__.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Callable, Optional, TypeVar
+from typing import Callable, Optional, TypeVar, Union
 
 from pyspark.accumulators import (  # noqa: F401
     Accumulator as Accumulator,
@@ -67,7 +67,7 @@ from pyspark.sql import (  # noqa: F401
 T = TypeVar("T")
 F = TypeVar("F", bound=Callable)
 
-def since(version: str) -> Callable[[T], T]: ...
+def since(version: Union[str, float]) -> Callable[[T], T]: ...
 def copy_func(
     f: F,
     name: Optional[str] = ...,
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index eab084a..54ae6fb 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -16,8 +16,11 @@
 #
 
 import sys
+from typing import Any, Optional
 
-from pyspark import since, _NoValue
+from py4j.java_gateway import JavaObject
+
+from pyspark import since, _NoValue  # type: ignore[attr-defined]
 
 
 class RuntimeConfig(object):
@@ -26,17 +29,17 @@ class RuntimeConfig(object):
     Options set here are automatically propagated to the Hadoop configuration during I/O.
     """
 
-    def __init__(self, jconf):
+    def __init__(self, jconf: JavaObject) -> None:
         """Create a new RuntimeConfig that wraps the underlying JVM object."""
         self._jconf = jconf
 
     @since(2.0)
-    def set(self, key, value):
+    def set(self, key: str, value: str) -> None:
         """Sets the given Spark runtime configuration property."""
         self._jconf.set(key, value)
 
     @since(2.0)
-    def get(self, key, default=_NoValue):
+    def get(self, key: str, default: Optional[str] = _NoValue) -> str:
         """Returns the value of Spark runtime configuration property for the given key,
         assuming it is set.
         """
@@ -49,25 +52,25 @@ class RuntimeConfig(object):
             return self._jconf.get(key, default)
 
     @since(2.0)
-    def unset(self, key):
+    def unset(self, key: str) -> None:
         """Resets the configuration property for the given key."""
         self._jconf.unset(key)
 
-    def _checkType(self, obj, identifier):
+    def _checkType(self, obj: Any, identifier: str) -> None:
         """Assert that an object is of type str."""
         if not isinstance(obj, str):
             raise TypeError("expected %s '%s' to be a string (was '%s')" %
                             (identifier, obj, type(obj).__name__))
 
     @since(2.4)
-    def isModifiable(self, key):
+    def isModifiable(self, key: str) -> bool:
         """Indicates whether the configuration property with the given key
         is modifiable in the current session.
         """
         return self._jconf.isModifiable(key)
 
 
-def _test():
+def _test() -> None:
     import os
     import doctest
     from pyspark.sql.session import SparkSession
diff --git a/python/pyspark/sql/conf.pyi b/python/pyspark/sql/conf.pyi
deleted file mode 100644
index 3e88f84..0000000
--- a/python/pyspark/sql/conf.pyi
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Optional
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-class RuntimeConfig:
-    def __init__(self, jconf: JavaObject) -> None: ...
-    def set(self, key: str, value: str) -> None: ...
-    def get(self, key: str, default: Optional[str] = ...) -> str: ...
-    def unset(self, key: str) -> None: ...
-    def isModifiable(self, key: str) -> bool: ...
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 55c0922..3e8a0d1 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import Any, Dict, Optional
+
 from pyspark.sql import column
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
@@ -24,7 +26,7 @@ __all__ = ["Observation"]
 class Observation:
     """Class to observe (named) metrics on a :class:`DataFrame`.
 
-    Metrics are aggregation expressions, which are applied to the DataFrame while is is being
+    Metrics are aggregation expressions, which are applied to the DataFrame while it is being
     processed by an action.
 
     The metrics have the following guarantees:
@@ -61,7 +63,7 @@ class Observation:
     >>> observation.get
     {'count': 2, 'max(age)': 5}
     """
-    def __init__(self, name=None):
+    def __init__(self, name: Optional[str] = None) -> None:
         """Constructs a named or unnamed Observation instance.
 
         Parameters
@@ -78,7 +80,7 @@ class Observation:
         self._jvm = None
         self._jo = None
 
-    def _on(self, df, *exprs):
+    def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         """Attaches this observation to the given :class:`DataFrame` to observe aggregations.
 
         Parameters
@@ -97,16 +99,18 @@ class Observation:
         assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
         assert self._jo is None, "an Observation can be used with a DataFrame only once"
 
-        self._jvm = df._sc._jvm
-        cls = self._jvm.org.apache.spark.sql.Observation
+        self._jvm = df._sc._jvm  # type: ignore[assignment]
+        cls = self._jvm.org.apache.spark.sql.Observation  # type: ignore[attr-defined]
         self._jo = cls(self._name) if self._name is not None else cls()
-        observed_df = self._jo.on(df._jdf,
-                                  exprs[0]._jc,
-                                  column._to_seq(df._sc, [c._jc for c in exprs[1:]]))
+        observed_df = self._jo.on(  # type: ignore[attr-defined]
+            df._jdf,
+            exprs[0]._jc,
+            column._to_seq(df._sc, [c._jc for c in exprs[1:]])  # type: ignore[attr-defined]
+        )
         return DataFrame(observed_df, df.sql_ctx)
 
     @property
-    def get(self):
+    def get(self) -> Dict[str, Any]:
         """Get the observed metrics.
 
         Waits until the observed dataset finishes its first action. Only the result of the
@@ -123,7 +127,7 @@ class Observation:
         return {k: v for k, v in jmap.items()}
 
 
-def _test():
+def _test() -> None:
     import doctest
     import sys
     from pyspark.context import SparkContext
diff --git a/python/pyspark/sql/observation.pyi b/python/pyspark/sql/observation.pyi
deleted file mode 100644
index 8709300..0000000
--- a/python/pyspark/sql/observation.pyi
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Optional, Dict, Any
-
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-
-class Observation:
-    def __init__(self, name: Optional[str] = ...): ...
-    @property
-    def get(self) -> Dict[str, Any]: ...

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org