You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sa...@apache.org on 2021/11/03 14:03:24 UTC
[spark] branch master updated: Revert
"[SPARK-36894][SPARK-37077][PYTHON] Synchronize RDD.toDF annotations with
SparkSession and SQLContext .createDataFrame variants."
This is an automated email from the ASF dual-hosted git repository.
sarutak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8687138 Revert "[SPARK-36894][SPARK-37077][PYTHON] Synchronize RDD.toDF annotations with SparkSession and SQLContext .createDataFrame variants."
8687138 is described below
commit 86871386b063d8f7a8b5b42eb327a3900525af58
Author: Kousuke Saruta <sa...@oss.nttdata.com>
AuthorDate: Wed Nov 3 23:01:38 2021 +0900
Revert "[SPARK-36894][SPARK-37077][PYTHON] Synchronize RDD.toDF annotations with SparkSession and SQLContext .createDataFrame variants."
This reverts commit 855da09f02f3007a2c36e7a738d4dc81fd95569a.
See [this comment](https://github.com/apache/spark/pull/34146#issuecomment-959136935).
Closes #34477 from sarutak/revert-SPARK-37077.
Authored-by: Kousuke Saruta <sa...@oss.nttdata.com>
Signed-off-by: Kousuke Saruta <sa...@oss.nttdata.com>
---
python/pyspark/rdd.pyi | 15 ++++-----------
python/pyspark/sql/_typing.pyi | 11 -----------
python/pyspark/sql/context.py | 38 ++++++++++++++++++++++----------------
python/pyspark/sql/session.py | 40 +++++++++++-----------------------------
4 files changed, 37 insertions(+), 67 deletions(-)
diff --git a/python/pyspark/rdd.pyi b/python/pyspark/rdd.pyi
index 84481d3..a810a2c 100644
--- a/python/pyspark/rdd.pyi
+++ b/python/pyspark/rdd.pyi
@@ -55,8 +55,8 @@ from pyspark.resource.requests import ( # noqa: F401
from pyspark.resource.profile import ResourceProfile
from pyspark.statcounter import StatCounter
from pyspark.sql.dataframe import DataFrame
-from pyspark.sql.types import AtomicType, StructType
-from pyspark.sql._typing import AtomicValue, RowLike
+from pyspark.sql.types import StructType
+from pyspark.sql._typing import RowLike
from py4j.java_gateway import JavaObject # type: ignore[import]
T = TypeVar("T")
@@ -445,18 +445,11 @@ class RDD(Generic[T]):
@overload
def toDF(
self: RDD[RowLike],
- schema: Optional[Union[List[str], Tuple[str, ...]]] = ...,
+ schema: Optional[List[str]] = ...,
sampleRatio: Optional[float] = ...,
) -> DataFrame: ...
@overload
- def toDF(
- self: RDD[RowLike], schema: Optional[Union[StructType, str]] = ...
- ) -> DataFrame: ...
- @overload
- def toDF(
- self: RDD[AtomicValue],
- schema: Union[AtomicType, str],
- ) -> DataFrame: ...
+ def toDF(self: RDD[RowLike], schema: Optional[StructType] = ...) -> DataFrame: ...
class RDDBarrier(Generic[T]):
rdd: RDD[T]
diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi
index b6b4606..1a3bd8f 100644
--- a/python/pyspark/sql/_typing.pyi
+++ b/python/pyspark/sql/_typing.pyi
@@ -42,17 +42,6 @@ AtomicDataTypeOrString = Union[pyspark.sql.types.AtomicType, str]
DataTypeOrString = Union[pyspark.sql.types.DataType, str]
OptionalPrimitiveType = Optional[PrimitiveType]
-AtomicValue = TypeVar(
- "AtomicValue",
- datetime.datetime,
- datetime.date,
- decimal.Decimal,
- bool,
- str,
- int,
- float,
-)
-
RowLike = TypeVar("RowLike", List[Any], Tuple[Any, ...], pyspark.sql.types.Row)
class SupportsOpen(Protocol):
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index eba2087..7d27c55 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -48,11 +48,13 @@ from pyspark.conf import SparkConf
if TYPE_CHECKING:
from pyspark.sql._typing import (
- AtomicValue,
- RowLike,
UserDefinedFunctionLike,
+ RowLike,
+ DateTimeLiteral,
+ LiteralType,
+ DecimalLiteral
)
- from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
+ from pyspark.sql.pandas._typing import DataFrameLike
__all__ = ["SQLContext", "HiveContext"]
@@ -321,8 +323,7 @@ class SQLContext(object):
@overload
def createDataFrame(
self,
- data: Union["RDD[RowLike]", Iterable["RowLike"]],
- schema: Union[List[str], Tuple[str, ...]] = ...,
+ data: Iterable["RowLike"],
samplingRatio: Optional[float] = ...,
) -> DataFrame:
...
@@ -330,9 +331,8 @@ class SQLContext(object):
@overload
def createDataFrame(
self,
- data: Union["RDD[RowLike]", Iterable["RowLike"]],
- schema: Union[StructType, str],
- *,
+ data: Iterable["RowLike"],
+ schema: Union[List[str], Tuple[str, ...]] = ...,
verifySchema: bool = ...,
) -> DataFrame:
...
@@ -340,10 +340,7 @@ class SQLContext(object):
@overload
def createDataFrame(
self,
- data: Union[
- "RDD[AtomicValue]",
- Iterable["AtomicValue"],
- ],
+ data: Iterable[Union["DateTimeLiteral", "LiteralType", "DecimalLiteral"]],
schema: Union[AtomicType, str],
verifySchema: bool = ...,
) -> DataFrame:
@@ -351,14 +348,23 @@ class SQLContext(object):
@overload
def createDataFrame(
- self, data: "PandasDataFrameLike", samplingRatio: Optional[float] = ...
+ self,
+ data: Iterable["RowLike"],
+ schema: Union[StructType, str],
+ verifySchema: bool = ...,
+ ) -> DataFrame:
+ ...
+
+ @overload
+ def createDataFrame(
+ self, data: "DataFrameLike", samplingRatio: Optional[float] = ...
) -> DataFrame:
...
@overload
def createDataFrame(
self,
- data: "PandasDataFrameLike",
+ data: "DataFrameLike",
schema: Union[StructType, str],
verifySchema: bool = ...,
) -> DataFrame:
@@ -366,8 +372,8 @@ class SQLContext(object):
def createDataFrame( # type: ignore[misc]
self,
- data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
- schema: Optional[Union[AtomicType, StructType, str]] = None,
+ data: Iterable["RowLike"],
+ schema: Optional[Union[List[str], Tuple[str, ...]]] = None,
samplingRatio: Optional[float] = None,
verifySchema: bool = True
) -> DataFrame:
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index de0f9e3..728d658 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -42,7 +42,7 @@ from pyspark.sql.types import (
from pyspark.sql.utils import install_exception_handler, is_timestamp_ntz_preferred
if TYPE_CHECKING:
- from pyspark.sql._typing import AtomicValue, RowLike
+ from pyspark.sql._typing import DateTimeLiteral, LiteralType, DecimalLiteral, RowLike
from pyspark.sql.catalog import Catalog
from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
from pyspark.sql.streaming import StreamingQueryManager
@@ -628,8 +628,7 @@ class SparkSession(SparkConversionMixin):
@overload
def createDataFrame(
self,
- data: Iterable["RowLike"],
- schema: Union[List[str], Tuple[str, ...]] = ...,
+ data: Union["RDD[RowLike]", Iterable["RowLike"]],
samplingRatio: Optional[float] = ...,
) -> DataFrame:
...
@@ -637,36 +636,19 @@ class SparkSession(SparkConversionMixin):
@overload
def createDataFrame(
self,
- data: "RDD[RowLike]",
+ data: Union["RDD[RowLike]", Iterable["RowLike"]],
schema: Union[List[str], Tuple[str, ...]] = ...,
- samplingRatio: Optional[float] = ...,
- ) -> DataFrame:
- ...
-
- @overload
- def createDataFrame(
- self,
- data: Iterable["RowLike"],
- schema: Union[StructType, str],
- *,
verifySchema: bool = ...,
) -> DataFrame:
...
@overload
def createDataFrame(
- self,
- data: "RDD[RowLike]",
- schema: Union[StructType, str],
- *,
- verifySchema: bool = ...,
- ) -> DataFrame:
- ...
-
- @overload
- def createDataFrame(
self,
- data: "RDD[AtomicValue]",
+ data: Union[
+ "RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral]]",
+ Iterable[Union["DateTimeLiteral", "LiteralType", "DecimalLiteral"]],
+ ],
schema: Union[AtomicType, str],
verifySchema: bool = ...,
) -> DataFrame:
@@ -674,10 +656,10 @@ class SparkSession(SparkConversionMixin):
@overload
def createDataFrame(
- self,
- data: Iterable["AtomicValue"],
- schema: Union[AtomicType, str],
- verifySchema: bool = ...,
+ self,
+ data: Union["RDD[RowLike]", Iterable["RowLike"]],
+ schema: Union[StructType, str],
+ verifySchema: bool = ...,
) -> DataFrame:
...
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org