You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/06/21 19:57:07 UTC
[spark] branch master updated: [SPARK-44133][PYTHON] Upgrade MyPy from 0.920 to 0.982
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 10751dc285c [SPARK-44133][PYTHON] Upgrade MyPy from 0.920 to 0.982
10751dc285c is described below
commit 10751dc285c5c639e3343a8abc26857407522822
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Wed Jun 21 12:56:57 2023 -0700
[SPARK-44133][PYTHON] Upgrade MyPy from 0.920 to 0.982
### What changes were proposed in this pull request?
This PR upgrade MyPy version from 0.920 to 0.982.
### Why are the changes needed?
To detect type related changes better by static analysys.
### Does this PR introduce _any_ user-facing change?
No, dev-only.
### How was this patch tested?
```bash
./dev/linter-python
```
Closes #41690 from HyukjinKwon/SPARK-44133.
Authored-by: Hyukjin Kwon <gu...@apache.org>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.github/workflows/build_and_test.yml | 2 +-
dev/requirements.txt | 2 +-
python/pyspark/ml/base.py | 2 +-
python/pyspark/ml/classification.py | 84 +++++++++----------
python/pyspark/ml/clustering.py | 36 ++++----
python/pyspark/ml/connect/base.py | 2 +-
python/pyspark/ml/connect/classification.py | 2 +-
python/pyspark/ml/feature.py | 44 +++++-----
python/pyspark/ml/fpm.py | 4 +-
python/pyspark/ml/recommendation.py | 6 +-
python/pyspark/ml/regression.py | 96 +++++++++++-----------
.../pyspark/ml/tests/typing/test_clustering.yaml | 6 +-
python/pyspark/ml/tests/typing/test_evaluation.yml | 6 +-
python/pyspark/ml/torch/distributor.py | 6 +-
python/pyspark/ml/tree.py | 16 ++--
python/pyspark/ml/tuning.py | 2 +-
python/pyspark/ml/util.py | 4 +-
python/pyspark/ml/wrapper.py | 4 +-
python/pyspark/mllib/classification.py | 6 +-
python/pyspark/mllib/clustering.py | 18 ++--
python/pyspark/mllib/evaluation.py | 38 ++++-----
python/pyspark/mllib/feature.py | 8 +-
python/pyspark/mllib/linalg/__init__.py | 4 +-
python/pyspark/mllib/linalg/distributed.py | 6 +-
python/pyspark/mllib/recommendation.py | 2 +-
python/pyspark/mllib/regression.py | 4 +-
python/pyspark/sql/observation.py | 2 +-
python/pyspark/sql/tests/typing/test_dataframe.yml | 4 +-
python/pyspark/sql/tests/typing/test_functions.yml | 32 ++++----
python/pyspark/sql/tests/typing/test_session.yml | 7 +-
python/pyspark/sql/types.py | 2 +-
python/pyspark/streaming/context.py | 4 +-
python/pyspark/tests/typing/test_rdd.yml | 4 +-
33 files changed, 235 insertions(+), 230 deletions(-)
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index a03aa53dc88..47732a5c9f6 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -624,7 +624,7 @@ jobs:
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
# Jinja2 3.0.0+ causes error when building with Sphinx.
# See also https://issues.apache.org/jira/browse/SPARK-35375.
- python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
+ python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
- name: Python linter
run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
diff --git a/dev/requirements.txt b/dev/requirements.txt
index 1af7256e0b3..72da5dbe163 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -20,7 +20,7 @@ openpyxl
coverage
# Linter
-mypy==0.920
+mypy==0.982
pytest-mypy-plugins==1.9.3
flake8==3.9.0
# See SPARK-38680.
diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index 34c3aa9c62c..b94358d26fd 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -396,7 +396,7 @@ class PredictionModel(Model, _PredictorParams, Generic[T], metaclass=ABCMeta):
"""
return self._set(predictionCol=value)
- @property # type: ignore[misc]
+ @property
@abstractmethod
@since("2.1.0")
def numFeatures(self) -> int:
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index c09a510d76b..81d7a1d51da 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -180,7 +180,7 @@ class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=ABCMeta)
"""
return self._set(rawPredictionCol=value)
- @property # type: ignore[misc]
+ @property
@abstractmethod
@since("2.1.0")
def numClasses(self) -> int:
@@ -284,7 +284,7 @@ class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]):
To be mixed in with :class:`pyspark.ml.JavaModel`
"""
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def numClasses(self) -> int:
"""
@@ -335,7 +335,7 @@ class _ClassificationSummary(JavaWrapper):
.. versionadded:: 3.1.0
"""
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def predictions(self) -> DataFrame:
"""
@@ -343,7 +343,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("predictions")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def predictionCol(self) -> str:
"""
@@ -351,7 +351,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("predictionCol")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def labelCol(self) -> str:
"""
@@ -360,7 +360,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("labelCol")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def weightCol(self) -> str:
"""
@@ -386,7 +386,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("labels")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def truePositiveRateByLabel(self) -> List[float]:
"""
@@ -394,7 +394,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("truePositiveRateByLabel")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def falsePositiveRateByLabel(self) -> List[float]:
"""
@@ -402,7 +402,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("falsePositiveRateByLabel")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def precisionByLabel(self) -> List[float]:
"""
@@ -410,7 +410,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("precisionByLabel")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def recallByLabel(self) -> List[float]:
"""
@@ -425,7 +425,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("fMeasureByLabel", beta)
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def accuracy(self) -> float:
"""
@@ -435,7 +435,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("accuracy")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def weightedTruePositiveRate(self) -> float:
"""
@@ -444,7 +444,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("weightedTruePositiveRate")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def weightedFalsePositiveRate(self) -> float:
"""
@@ -452,7 +452,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("weightedFalsePositiveRate")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def weightedRecall(self) -> float:
"""
@@ -461,7 +461,7 @@ class _ClassificationSummary(JavaWrapper):
"""
return self._call_java("weightedRecall")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def weightedPrecision(self) -> float:
"""
@@ -485,7 +485,7 @@ class _TrainingSummary(JavaWrapper):
.. versionadded:: 3.1.0
"""
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def objectiveHistory(self) -> List[float]:
"""
@@ -495,7 +495,7 @@ class _TrainingSummary(JavaWrapper):
"""
return self._call_java("objectiveHistory")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def totalIterations(self) -> int:
"""
@@ -512,7 +512,7 @@ class _BinaryClassificationSummary(_ClassificationSummary):
.. versionadded:: 3.1.0
"""
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def scoreCol(self) -> str:
"""
@@ -536,7 +536,7 @@ class _BinaryClassificationSummary(_ClassificationSummary):
"""
return self._call_java("roc")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def areaUnderROC(self) -> float:
"""
@@ -545,7 +545,7 @@ class _BinaryClassificationSummary(_ClassificationSummary):
"""
return self._call_java("areaUnderROC")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def pr(self) -> DataFrame:
"""
@@ -555,7 +555,7 @@ class _BinaryClassificationSummary(_ClassificationSummary):
"""
return self._call_java("pr")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def fMeasureByThreshold(self) -> DataFrame:
"""
@@ -564,7 +564,7 @@ class _BinaryClassificationSummary(_ClassificationSummary):
"""
return self._call_java("fMeasureByThreshold")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def precisionByThreshold(self) -> DataFrame:
"""
@@ -574,7 +574,7 @@ class _BinaryClassificationSummary(_ClassificationSummary):
"""
return self._call_java("precisionByThreshold")
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def recallByThreshold(self) -> DataFrame:
"""
@@ -857,7 +857,7 @@ class LinearSVCModel(
"""
return self._set(threshold=value)
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def coefficients(self) -> Vector:
"""
@@ -865,7 +865,7 @@ class LinearSVCModel(
"""
return self._call_java("coefficients")
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def intercept(self) -> float:
"""
@@ -1527,7 +1527,7 @@ class LogisticRegressionModel(
.. versionadded:: 1.3.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def coefficients(self) -> Vector:
"""
@@ -1536,7 +1536,7 @@ class LogisticRegressionModel(
"""
return self._call_java("coefficients")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def intercept(self) -> float:
"""
@@ -1545,7 +1545,7 @@ class LogisticRegressionModel(
"""
return self._call_java("intercept")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def coefficientMatrix(self) -> Matrix:
"""
@@ -1553,7 +1553,7 @@ class LogisticRegressionModel(
"""
return self._call_java("coefficientMatrix")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def interceptVector(self) -> Vector:
"""
@@ -1561,7 +1561,7 @@ class LogisticRegressionModel(
"""
return self._call_java("interceptVector")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def summary(self) -> "LogisticRegressionTrainingSummary":
"""
@@ -1609,7 +1609,7 @@ class LogisticRegressionSummary(_ClassificationSummary):
.. versionadded:: 2.0.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def probabilityCol(self) -> str:
"""
@@ -1618,7 +1618,7 @@ class LogisticRegressionSummary(_ClassificationSummary):
"""
return self._call_java("probabilityCol")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def featuresCol(self) -> str:
"""
@@ -2279,13 +2279,13 @@ class RandomForestClassificationModel(
"""
return self._call_java("featureImportances")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def trees(self) -> List[DecisionTreeClassificationModel]:
"""Trees in this ensemble. Warning: These have null parent Estimators."""
return [DecisionTreeClassificationModel(m) for m in list(self._call_java("trees"))]
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def summary(self) -> "RandomForestClassificationTrainingSummary":
"""
@@ -2767,7 +2767,7 @@ class GBTClassificationModel(
"""
return self._call_java("featureImportances")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def trees(self) -> List[DecisionTreeRegressionModel]:
"""Trees in this ensemble. Warning: These have null parent Estimators."""
@@ -3018,7 +3018,7 @@ class NaiveBayesModel(
.. versionadded:: 1.5.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def pi(self) -> Vector:
"""
@@ -3026,7 +3026,7 @@ class NaiveBayesModel(
"""
return self._call_java("pi")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def theta(self) -> Matrix:
"""
@@ -3034,7 +3034,7 @@ class NaiveBayesModel(
"""
return self._call_java("theta")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def sigma(self) -> Matrix:
"""
@@ -3311,7 +3311,7 @@ class MultilayerPerceptronClassificationModel(
.. versionadded:: 1.6.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def weights(self) -> Vector:
"""
@@ -4227,7 +4227,7 @@ class FMClassificationModel(
.. versionadded:: 3.0.0
"""
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def intercept(self) -> float:
"""
@@ -4235,7 +4235,7 @@ class FMClassificationModel(
"""
return self._call_java("intercept")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def linear(self) -> Vector:
"""
@@ -4243,7 +4243,7 @@ class FMClassificationModel(
"""
return self._call_java("linear")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def factors(self) -> Matrix:
"""
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 19305749003..41108782a47 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -83,7 +83,7 @@ class ClusteringSummary(JavaWrapper):
.. versionadded:: 2.1.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def predictionCol(self) -> str:
"""
@@ -91,7 +91,7 @@ class ClusteringSummary(JavaWrapper):
"""
return self._call_java("predictionCol")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def predictions(self) -> DataFrame:
"""
@@ -99,7 +99,7 @@ class ClusteringSummary(JavaWrapper):
"""
return self._call_java("predictions")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def featuresCol(self) -> str:
"""
@@ -107,7 +107,7 @@ class ClusteringSummary(JavaWrapper):
"""
return self._call_java("featuresCol")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def k(self) -> int:
"""
@@ -115,7 +115,7 @@ class ClusteringSummary(JavaWrapper):
"""
return self._call_java("k")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def cluster(self) -> DataFrame:
"""
@@ -123,7 +123,7 @@ class ClusteringSummary(JavaWrapper):
"""
return self._call_java("cluster")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def clusterSizes(self) -> List[int]:
"""
@@ -131,7 +131,7 @@ class ClusteringSummary(JavaWrapper):
"""
return self._call_java("clusterSizes")
- @property # type: ignore[misc]
+ @property
@since("2.4.0")
def numIter(self) -> int:
"""
@@ -210,7 +210,7 @@ class GaussianMixtureModel(
"""
return self._set(probabilityCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def weights(self) -> List[float]:
"""
@@ -220,7 +220,7 @@ class GaussianMixtureModel(
"""
return self._call_java("weights")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def gaussians(self) -> List[MultivariateGaussian]:
"""
@@ -236,7 +236,7 @@ class GaussianMixtureModel(
for jgaussian in jgaussians
]
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def gaussiansDF(self) -> DataFrame:
"""
@@ -246,7 +246,7 @@ class GaussianMixtureModel(
"""
return self._call_java("gaussiansDF")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def summary(self) -> "GaussianMixtureSummary":
"""
@@ -529,7 +529,7 @@ class GaussianMixtureSummary(ClusteringSummary):
.. versionadded:: 2.1.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def probabilityCol(self) -> str:
"""
@@ -537,7 +537,7 @@ class GaussianMixtureSummary(ClusteringSummary):
"""
return self._call_java("probabilityCol")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def probability(self) -> DataFrame:
"""
@@ -545,7 +545,7 @@ class GaussianMixtureSummary(ClusteringSummary):
"""
return self._call_java("probability")
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def logLikelihood(self) -> float:
"""
@@ -561,7 +561,7 @@ class KMeansSummary(ClusteringSummary):
.. versionadded:: 2.1.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.4.0")
def trainingCost(self) -> float:
"""
@@ -683,7 +683,7 @@ class KMeansModel(
"""Get the cluster centers, represented as a list of NumPy arrays."""
return [c.toArray() for c in self._call_java("clusterCenters")]
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def summary(self) -> KMeansSummary:
"""
@@ -1021,7 +1021,7 @@ class BisectingKMeansModel(
)
return self._call_java("computeCost", dataset)
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def summary(self) -> "BisectingKMeansSummary":
"""
@@ -1245,7 +1245,7 @@ class BisectingKMeansSummary(ClusteringSummary):
.. versionadded:: 2.1.0
"""
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def trainingCost(self) -> float:
"""
diff --git a/python/pyspark/ml/connect/base.py b/python/pyspark/ml/connect/base.py
index 4f1f77ac627..f86b1e928c2 100644
--- a/python/pyspark/ml/connect/base.py
+++ b/python/pyspark/ml/connect/base.py
@@ -322,7 +322,7 @@ class PredictionModel(Model, _PredictorParams, metaclass=ABCMeta):
"""
return self._set(predictionCol=value)
- @property # type: ignore[misc]
+ @property
@abstractmethod
@since("3.5.0")
def numFeatures(self) -> int:
diff --git a/python/pyspark/ml/connect/classification.py b/python/pyspark/ml/connect/classification.py
index 8d2006c7af9..eaad09920c0 100644
--- a/python/pyspark/ml/connect/classification.py
+++ b/python/pyspark/ml/connect/classification.py
@@ -113,7 +113,7 @@ def _train_logistic_regression_model_worker_fn(
num_samples_per_worker,
batch_size,
num_workers=0,
- prefetch_factor=None,
+ prefetch_factor=None, # type: ignore
)
for i in range(max_iter):
ddp_model.train()
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index e7ec35bffa0..349b50913d7 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -1219,7 +1219,7 @@ class CountVectorizerModel(
model._set(vocabSize=len(vocabulary))
return model
- @property # type: ignore[misc]
+ @property
@since("1.6.0")
def vocabulary(self) -> List[str]:
"""
@@ -1889,7 +1889,7 @@ class IDFModel(JavaModel, _IDFParams, JavaMLReadable["IDFModel"], JavaMLWritable
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def idf(self) -> Vector:
"""
@@ -1897,7 +1897,7 @@ class IDFModel(JavaModel, _IDFParams, JavaMLReadable["IDFModel"], JavaMLWritable
"""
return self._call_java("idf")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def docFreq(self) -> List[int]:
"""
@@ -1905,7 +1905,7 @@ class IDFModel(JavaModel, _IDFParams, JavaMLReadable["IDFModel"], JavaMLWritable
"""
return self._call_java("docFreq")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def numDocs(self) -> int:
"""
@@ -2255,7 +2255,7 @@ class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable["ImputerModel"], Ja
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def surrogateDF(self) -> DataFrame:
"""
@@ -2470,7 +2470,7 @@ class MaxAbsScalerModel(
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def maxAbs(self) -> Vector:
"""
@@ -2820,7 +2820,7 @@ class MinMaxScalerModel(
"""
return self._set(max=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def originalMin(self) -> Vector:
"""
@@ -2828,7 +2828,7 @@ class MinMaxScalerModel(
"""
return self._call_java("originalMin")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def originalMax(self) -> Vector:
"""
@@ -3329,7 +3329,7 @@ class OneHotEncoderModel(
"""
return self._set(handleInvalid=value)
- @property # type: ignore[misc]
+ @property
@since("2.3.0")
def categorySizes(self) -> List[int]:
"""
@@ -4007,7 +4007,7 @@ class RobustScalerModel(
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def median(self) -> Vector:
"""
@@ -4015,7 +4015,7 @@ class RobustScalerModel(
"""
return self._call_java("median")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def range(self) -> Vector:
"""
@@ -4459,7 +4459,7 @@ class StandardScalerModel(
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def std(self) -> Vector:
"""
@@ -4467,7 +4467,7 @@ class StandardScalerModel(
"""
return self._call_java("std")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def mean(self) -> Vector:
"""
@@ -4832,7 +4832,7 @@ class StringIndexerModel(
model.setHandleInvalid(handleInvalid)
return model
- @property # type: ignore[misc]
+ @property
@since("1.5.0")
def labels(self) -> List[str]:
"""
@@ -4843,7 +4843,7 @@ class StringIndexerModel(
"""
return self._call_java("labels")
- @property # type: ignore[misc]
+ @property
@since("3.0.2")
def labelsArray(self) -> List[str]:
"""
@@ -5627,7 +5627,7 @@ class VectorIndexerModel(
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def numFeatures(self) -> int:
"""
@@ -5635,7 +5635,7 @@ class VectorIndexerModel(
"""
return self._call_java("numFeatures")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def categoryMaps(self) -> Dict[int, Tuple[float, int]]:
"""
@@ -6263,7 +6263,7 @@ class PCAModel(JavaModel, _PCAParams, JavaMLReadable["PCAModel"], JavaMLWritable
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def pc(self) -> DenseMatrix:
"""
@@ -6272,7 +6272,7 @@ class PCAModel(JavaModel, _PCAParams, JavaMLReadable["PCAModel"], JavaMLWritable
"""
return self._call_java("pc")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def explainedVariance(self) -> DenseVector:
"""
@@ -6729,7 +6729,7 @@ class _SelectorModel(JavaModel, _SelectorParams):
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def selectedFeatures(self) -> List[int]:
"""
@@ -7144,7 +7144,7 @@ class VarianceThresholdSelectorModel(
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("3.1.0")
def selectedFeatures(self) -> List[int]:
"""
@@ -7419,7 +7419,7 @@ class UnivariateFeatureSelectorModel(
"""
return self._set(outputCol=value)
- @property # type: ignore[misc]
+ @property
@since("3.1.1")
def selectedFeatures(self) -> List[int]:
"""
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 00f77c5a54a..cba4219a069 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -124,7 +124,7 @@ class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable["
"""
return self._set(predictionCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def freqItemsets(self) -> DataFrame:
"""
@@ -134,7 +134,7 @@ class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable["
"""
return self._call_java("freqItemsets")
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def associationRules(self) -> DataFrame:
"""
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index f13fb721b9a..873140e51af 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -609,13 +609,13 @@ class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable["ALSMo
"""
return self._set(blockSize=value)
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def rank(self) -> int:
"""rank of the matrix factorization model"""
return self._call_java("rank")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def userFactors(self) -> DataFrame:
"""
@@ -624,7 +624,7 @@ class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable["ALSMo
"""
return self._call_java("userFactors")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def itemFactors(self) -> DataFrame:
"""
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 8678ec3f31e..a4ce961c92e 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -453,7 +453,7 @@ class LinearRegressionModel(
.. versionadded:: 1.4.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def coefficients(self) -> Vector:
"""
@@ -461,7 +461,7 @@ class LinearRegressionModel(
"""
return self._call_java("coefficients")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def intercept(self) -> float:
"""
@@ -469,7 +469,7 @@ class LinearRegressionModel(
"""
return self._call_java("intercept")
- @property # type: ignore[misc]
+ @property
@since("2.3.0")
def scale(self) -> float:
r"""
@@ -477,7 +477,7 @@ class LinearRegressionModel(
"""
return self._call_java("scale")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def summary(self) -> "LinearRegressionTrainingSummary":
"""
@@ -517,7 +517,7 @@ class LinearRegressionSummary(JavaWrapper):
.. versionadded:: 2.0.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def predictions(self) -> DataFrame:
"""
@@ -525,7 +525,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("predictions")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def predictionCol(self) -> str:
"""
@@ -534,7 +534,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("predictionCol")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def labelCol(self) -> str:
"""
@@ -543,7 +543,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("labelCol")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def featuresCol(self) -> str:
"""
@@ -552,7 +552,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("featuresCol")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def explainedVariance(self) -> float:
r"""
@@ -571,7 +571,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("explainedVariance")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def meanAbsoluteError(self) -> float:
"""
@@ -587,7 +587,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("meanAbsoluteError")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def meanSquaredError(self) -> float:
"""
@@ -603,7 +603,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("meanSquaredError")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def rootMeanSquaredError(self) -> float:
"""
@@ -618,7 +618,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("rootMeanSquaredError")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def r2(self) -> float:
"""
@@ -635,7 +635,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("r2")
- @property # type: ignore[misc]
+ @property
@since("2.4.0")
def r2adj(self) -> float:
"""
@@ -651,7 +651,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("r2adj")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def residuals(self) -> DataFrame:
"""
@@ -659,7 +659,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("residuals")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def numInstances(self) -> int:
"""
@@ -667,7 +667,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("numInstances")
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def degreesOfFreedom(self) -> int:
"""
@@ -675,7 +675,7 @@ class LinearRegressionSummary(JavaWrapper):
"""
return self._call_java("degreesOfFreedom")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def devianceResiduals(self) -> List[float]:
"""
@@ -977,7 +977,7 @@ class IsotonicRegressionModel(
"""
return self._set(featureIndex=value)
- @property # type: ignore[misc]
+ @property
@since("1.6.0")
def boundaries(self) -> Vector:
"""
@@ -985,7 +985,7 @@ class IsotonicRegressionModel(
"""
return self._call_java("boundaries")
- @property # type: ignore[misc]
+ @property
@since("1.6.0")
def predictions(self) -> Vector:
"""
@@ -994,7 +994,7 @@ class IsotonicRegressionModel(
"""
return self._call_java("predictions")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def numFeatures(self) -> int:
"""
@@ -1600,7 +1600,7 @@ class RandomForestRegressionModel(
.. versionadded:: 1.4.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def trees(self) -> List[DecisionTreeRegressionModel]:
"""Trees in this ensemble. Warning: These have null parent Estimators."""
@@ -1989,7 +1989,7 @@ class GBTRegressionModel(
"""
return self._call_java("featureImportances")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def trees(self) -> List[DecisionTreeRegressionModel]:
"""Trees in this ensemble. Warning: These have null parent Estimators."""
@@ -2308,7 +2308,7 @@ class AFTSurvivalRegressionModel(
"""
return self._set(quantilesCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def coefficients(self) -> Vector:
"""
@@ -2316,7 +2316,7 @@ class AFTSurvivalRegressionModel(
"""
return self._call_java("coefficients")
- @property # type: ignore[misc]
+ @property
@since("1.6.0")
def intercept(self) -> float:
"""
@@ -2324,7 +2324,7 @@ class AFTSurvivalRegressionModel(
"""
return self._call_java("intercept")
- @property # type: ignore[misc]
+ @property
@since("1.6.0")
def scale(self) -> float:
"""
@@ -2734,7 +2734,7 @@ class GeneralizedLinearRegressionModel(
"""
return self._set(linkPredictionCol=value)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def coefficients(self) -> Vector:
"""
@@ -2742,7 +2742,7 @@ class GeneralizedLinearRegressionModel(
"""
return self._call_java("coefficients")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def intercept(self) -> float:
"""
@@ -2750,7 +2750,7 @@ class GeneralizedLinearRegressionModel(
"""
return self._call_java("intercept")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def summary(self) -> "GeneralizedLinearRegressionTrainingSummary":
"""
@@ -2792,7 +2792,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
.. versionadded:: 2.0.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def predictions(self) -> DataFrame:
"""
@@ -2800,7 +2800,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("predictions")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def predictionCol(self) -> str:
"""
@@ -2809,7 +2809,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("predictionCol")
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def numInstances(self) -> int:
"""
@@ -2817,7 +2817,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("numInstances")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def rank(self) -> int:
"""
@@ -2825,7 +2825,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("rank")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def degreesOfFreedom(self) -> int:
"""
@@ -2833,7 +2833,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("degreesOfFreedom")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def residualDegreeOfFreedom(self) -> int:
"""
@@ -2841,7 +2841,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("residualDegreeOfFreedom")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def residualDegreeOfFreedomNull(self) -> int:
"""
@@ -2863,7 +2863,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("residuals", residualsType)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def nullDeviance(self) -> float:
"""
@@ -2871,7 +2871,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("nullDeviance")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def deviance(self) -> float:
"""
@@ -2879,7 +2879,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("deviance")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def dispersion(self) -> float:
"""
@@ -2890,7 +2890,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
return self._call_java("dispersion")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def aic(self) -> float:
"""
@@ -2907,7 +2907,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm
.. versionadded:: 2.0.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def numIterations(self) -> int:
"""
@@ -2915,7 +2915,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm
"""
return self._call_java("numIterations")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def solver(self) -> str:
"""
@@ -2923,7 +2923,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm
"""
return self._call_java("solver")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def coefficientStandardErrors(self) -> List[float]:
"""
@@ -2934,7 +2934,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm
"""
return self._call_java("coefficientStandardErrors")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def tValues(self) -> List[float]:
"""
@@ -2945,7 +2945,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm
"""
return self._call_java("tValues")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def pValues(self) -> List[float]:
"""
@@ -3280,7 +3280,7 @@ class FMRegressionModel(
.. versionadded:: 3.0.0
"""
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def intercept(self) -> float:
"""
@@ -3288,7 +3288,7 @@ class FMRegressionModel(
"""
return self._call_java("intercept")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def linear(self) -> Vector:
"""
@@ -3296,7 +3296,7 @@ class FMRegressionModel(
"""
return self._call_java("linear")
- @property # type: ignore[misc]
+ @property
@since("3.0.0")
def factors(self) -> Matrix:
"""
diff --git a/python/pyspark/ml/tests/typing/test_clustering.yaml b/python/pyspark/ml/tests/typing/test_clustering.yaml
index b208573975d..bc50cf5b258 100644
--- a/python/pyspark/ml/tests/typing/test_clustering.yaml
+++ b/python/pyspark/ml/tests/typing/test_clustering.yaml
@@ -27,7 +27,7 @@
reveal_type(local_model)
reveal_type(local_model.setFeaturesCol("foo"))
out: |
- main:4: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel*"
- main:5: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel*"
+ main:4: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel"
+ main:5: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel"
main:8: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel"
- main:9: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel*"
+ main:9: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel"
diff --git a/python/pyspark/ml/tests/typing/test_evaluation.yml b/python/pyspark/ml/tests/typing/test_evaluation.yml
index a60166dfb96..51e076cf6bc 100644
--- a/python/pyspark/ml/tests/typing/test_evaluation.yml
+++ b/python/pyspark/ml/tests/typing/test_evaluation.yml
@@ -22,7 +22,7 @@
BinaryClassificationEvaluator().setMetricName("areaUnderROC")
BinaryClassificationEvaluator(metricName="areaUnderPR")
- BinaryClassificationEvaluator().setMetricName("foo") # E: Argument 1 to "setMetricName" of "BinaryClassificationEvaluator" has incompatible type "Literal['foo']"; expected "Union[Literal['areaUnderROC'], Literal['areaUnderPR']]" [arg-type]
- BinaryClassificationEvaluator(metricName="bar") # E: Argument "metricName" to "BinaryClassificationEvaluator" has incompatible type "Literal['bar']"; expected "Union[Literal['areaUnderROC'], Literal['areaUnderPR']]" [arg-type]
+ BinaryClassificationEvaluator().setMetricName("foo") # E: Argument 1 to "setMetricName" of "BinaryClassificationEvaluator" has incompatible type "Literal['foo']"; expected "Literal['areaUnderROC', 'areaUnderPR']" [arg-type]
+ BinaryClassificationEvaluator(metricName="bar") # E: Argument "metricName" to "BinaryClassificationEvaluator" has incompatible type "Literal['bar']"; expected "Literal['areaUnderROC', 'areaUnderPR']" [arg-type]
- reveal_type(BinaryClassificationEvaluator.load("foo")) # N: Revealed type is "pyspark.ml.evaluation.BinaryClassificationEvaluator*"
+ reveal_type(BinaryClassificationEvaluator.load("foo")) # N: Revealed type is "pyspark.ml.evaluation.BinaryClassificationEvaluator"
diff --git a/python/pyspark/ml/torch/distributor.py b/python/pyspark/ml/torch/distributor.py
index d40fbc61766..9f9636e6b10 100644
--- a/python/pyspark/ml/torch/distributor.py
+++ b/python/pyspark/ml/torch/distributor.py
@@ -767,8 +767,8 @@ class TorchDistributor(Distributor):
schema_file_path = os.path.join(save_dir, "schema.json")
schema_json_string = json.dumps(input_schema_json)
- with open(schema_file_path, "w") as f: # type:ignore
- f.write(schema_json_string) # type:ignore
+ with open(schema_file_path, "w") as f:
+ f.write(schema_json_string)
os.environ[SPARK_PARTITION_ARROW_DATA_FILE] = arrow_file_path
os.environ[SPARK_DATAFRAME_SCHEMA_FILE] = schema_file_path
@@ -959,7 +959,7 @@ class TorchDistributor(Distributor):
def _get_spark_partition_data_loader(
- num_samples: int, batch_size: int, num_workers: int = 1, prefetch_factor: Optional[int] = 2
+ num_samples: int, batch_size: int, num_workers: int = 1, prefetch_factor: int = 2
) -> Any:
"""
This function must be called inside the `train_function` where `train_function`
diff --git a/python/pyspark/ml/tree.py b/python/pyspark/ml/tree.py
index ad405b742bd..5143c3214b6 100644
--- a/python/pyspark/ml/tree.py
+++ b/python/pyspark/ml/tree.py
@@ -46,19 +46,19 @@ class _DecisionTreeModel(JavaPredictionModel[T]):
.. versionadded:: 1.5.0
"""
- @property # type: ignore[misc]
+ @property
@since("1.5.0")
def numNodes(self) -> int:
"""Return number of nodes of the decision tree."""
return self._call_java("numNodes")
- @property # type: ignore[misc]
+ @property
@since("1.5.0")
def depth(self) -> int:
"""Return depth of the decision tree."""
return self._call_java("depth")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def toDebugString(self) -> str:
"""Full description of model."""
@@ -218,31 +218,31 @@ class _TreeEnsembleModel(JavaPredictionModel[T]):
Represents a tree ensemble model.
"""
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def trees(self) -> Sequence["_DecisionTreeModel"]:
"""Trees in this ensemble. Warning: These have null parent Estimators."""
return [_DecisionTreeModel(m) for m in list(self._call_java("trees"))]
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def getNumTrees(self) -> int:
"""Number of trees in ensemble."""
return self._call_java("getNumTrees")
- @property # type: ignore[misc]
+ @property
@since("1.5.0")
def treeWeights(self) -> List[float]:
"""Return the weights for each tree"""
return list(self._call_java("javaTreeWeights"))
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def totalNumNodes(self) -> int:
"""Total number of nodes, summed over all trees in the ensemble."""
return self._call_java("totalNumNodes")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def toDebugString(self) -> str:
"""Full description of model."""
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 0dabcdd7f27..63f51229a9f 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -461,7 +461,7 @@ class _ValidatorSharedReadWrite:
evaluator = instance.getEvaluator()
uidMap = MetaAlgorithmReadWrite.getUidMap(estiamtor)
- for elem in [evaluator] + list(uidMap.values()): # type: ignore[arg-type]
+ for elem in [evaluator] + list(uidMap.values()):
if not isinstance(elem, MLWritable):
raise ValueError(
f"Validator write will fail because it contains {elem.uid} "
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 74ce8162d18..2c90ff3cb7b 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -659,7 +659,7 @@ class HasTrainingSummary(Generic[T]):
.. versionadded:: 3.0.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def hasSummary(self) -> bool:
"""
@@ -668,7 +668,7 @@ class HasTrainingSummary(Generic[T]):
"""
return cast("JavaWrapper", self)._call_java("hasSummary")
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def summary(self) -> T:
"""
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index a83ed4c3d4b..5eee3eeef11 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -268,7 +268,7 @@ class JavaParams(JavaWrapper, Params, metaclass=ABCMeta):
return self._java_obj
@staticmethod
- def _from_java(java_stage: "JavaObject") -> "JP":
+ def _from_java(java_stage: "JavaObject") -> "JP": # type: ignore
"""
Given a Java object, create and return a Python wrapper of it.
Used for ML persistence.
@@ -449,7 +449,7 @@ class JavaPredictionModel(PredictionModel[T], JavaModel, _PredictorParams):
(Private) Java Model for prediction tasks (regression and classification).
"""
- @property # type: ignore[misc]
+ @property
@since("2.1.0")
def numFeatures(self) -> int:
"""
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 1a3b3581e96..1e1795d9fb3 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -73,7 +73,7 @@ class LinearClassificationModel(LinearModel):
"""
self._threshold = value
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def threshold(self) -> Optional[float]:
"""
@@ -214,7 +214,7 @@ class LogisticRegressionModel(LinearClassificationModel):
self._numClasses - 1, self._dataWithBiasSize
)
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def numFeatures(self) -> int:
"""
@@ -222,7 +222,7 @@ class LogisticRegressionModel(LinearClassificationModel):
"""
return self._numFeatures
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def numClasses(self) -> int:
"""
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 89210a8e0a4..4595268edc6 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -79,14 +79,14 @@ class BisectingKMeansModel(JavaModelWrapper):
super(BisectingKMeansModel, self).__init__(java_model)
self.centers = [c.toArray() for c in self.call("clusterCenters")]
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def clusterCenters(self) -> List[np.ndarray]:
"""Get the cluster centers, represented as a list of NumPy
arrays."""
return self.centers
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def k(self) -> int:
"""Get the number of clusters"""
@@ -281,13 +281,13 @@ class KMeansModel(Saveable, Loader["KMeansModel"]):
def __init__(self, centers: List["VectorLike"]):
self.centers = centers
- @property # type: ignore[misc]
+ @property
@since("1.0.0")
def clusterCenters(self) -> List["VectorLike"]:
"""Get the cluster centers, represented as a list of NumPy arrays."""
return self.centers
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def k(self) -> int:
"""Total number of clusters."""
@@ -532,7 +532,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader["GaussianM
True
"""
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def weights(self) -> np.ndarray:
"""
@@ -541,7 +541,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader["GaussianM
"""
return array(self.call("weights"))
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def gaussians(self) -> List[MultivariateGaussian]:
"""
@@ -552,7 +552,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader["GaussianM
MultivariateGaussian(gaussian[0], gaussian[1]) for gaussian in self.call("gaussians")
]
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def k(self) -> int:
"""Number of gaussians in mixture."""
@@ -778,7 +778,7 @@ class PowerIterationClusteringModel(
... pass
"""
- @property # type: ignore[misc]
+ @property
@since("1.5.0")
def k(self) -> int:
"""
@@ -946,7 +946,7 @@ class StreamingKMeansModel(KMeansModel):
super(StreamingKMeansModel, self).__init__(centers=clusterCenters)
self._clusterWeights = list(clusterWeights) # type: ignore[arg-type]
- @property # type: ignore[misc]
+ @property
@since("1.5.0")
def clusterWeights(self) -> List[np.float64]:
"""Return the cluster weights."""
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index cee61a1b241..2a8991df050 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -85,7 +85,7 @@ class BinaryClassificationMetrics(JavaModelWrapper):
java_model = java_class(df._jdf)
super(BinaryClassificationMetrics, self).__init__(java_model)
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def areaUnderROC(self) -> float:
"""
@@ -94,7 +94,7 @@ class BinaryClassificationMetrics(JavaModelWrapper):
"""
return self.call("areaUnderROC")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def areaUnderPR(self) -> float:
"""
@@ -161,7 +161,7 @@ class RegressionMetrics(JavaModelWrapper):
java_model = java_class(df._jdf)
super(RegressionMetrics, self).__init__(java_model)
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def explainedVariance(self) -> float:
r"""
@@ -170,7 +170,7 @@ class RegressionMetrics(JavaModelWrapper):
"""
return self.call("explainedVariance")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def meanAbsoluteError(self) -> float:
"""
@@ -179,7 +179,7 @@ class RegressionMetrics(JavaModelWrapper):
"""
return self.call("meanAbsoluteError")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def meanSquaredError(self) -> float:
"""
@@ -188,7 +188,7 @@ class RegressionMetrics(JavaModelWrapper):
"""
return self.call("meanSquaredError")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def rootMeanSquaredError(self) -> float:
"""
@@ -197,7 +197,7 @@ class RegressionMetrics(JavaModelWrapper):
"""
return self.call("rootMeanSquaredError")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def r2(self) -> float:
"""
@@ -348,7 +348,7 @@ class MulticlassMetrics(JavaModelWrapper):
else:
return self.call("fMeasure", label, beta)
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def accuracy(self) -> float:
"""
@@ -357,7 +357,7 @@ class MulticlassMetrics(JavaModelWrapper):
"""
return self.call("accuracy")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def weightedTruePositiveRate(self) -> float:
"""
@@ -366,7 +366,7 @@ class MulticlassMetrics(JavaModelWrapper):
"""
return self.call("weightedTruePositiveRate")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def weightedFalsePositiveRate(self) -> float:
"""
@@ -374,7 +374,7 @@ class MulticlassMetrics(JavaModelWrapper):
"""
return self.call("weightedFalsePositiveRate")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def weightedRecall(self) -> float:
"""
@@ -383,7 +383,7 @@ class MulticlassMetrics(JavaModelWrapper):
"""
return self.call("weightedRecall")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def weightedPrecision(self) -> float:
"""
@@ -482,7 +482,7 @@ class RankingMetrics(JavaModelWrapper, Generic[T]):
"""
return self.call("precisionAt", int(k))
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def meanAveragePrecision(self) -> float:
"""
@@ -614,7 +614,7 @@ class MultilabelMetrics(JavaModelWrapper):
else:
return self.call("f1Measure", float(label))
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def microPrecision(self) -> float:
"""
@@ -623,7 +623,7 @@ class MultilabelMetrics(JavaModelWrapper):
"""
return self.call("microPrecision")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def microRecall(self) -> float:
"""
@@ -632,7 +632,7 @@ class MultilabelMetrics(JavaModelWrapper):
"""
return self.call("microRecall")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def microF1Measure(self) -> float:
"""
@@ -641,7 +641,7 @@ class MultilabelMetrics(JavaModelWrapper):
"""
return self.call("microF1Measure")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def hammingLoss(self) -> float:
"""
@@ -649,7 +649,7 @@ class MultilabelMetrics(JavaModelWrapper):
"""
return self.call("hammingLoss")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def subsetAccuracy(self) -> float:
"""
@@ -658,7 +658,7 @@ class MultilabelMetrics(JavaModelWrapper):
"""
return self.call("subsetAccuracy")
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def accuracy(self) -> float:
"""
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 17dab6ac057..2a8cb7d8df3 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -251,7 +251,7 @@ class StandardScalerModel(JavaVectorTransformer):
self.call("setWithStd", withStd)
return self
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def withStd(self) -> bool:
"""
@@ -259,7 +259,7 @@ class StandardScalerModel(JavaVectorTransformer):
"""
return self.call("withStd")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def withMean(self) -> bool:
"""
@@ -267,7 +267,7 @@ class StandardScalerModel(JavaVectorTransformer):
"""
return self.call("withMean")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def std(self) -> Vector:
"""
@@ -275,7 +275,7 @@ class StandardScalerModel(JavaVectorTransformer):
"""
return self.call("std")
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def mean(self) -> Vector:
"""
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 4dcd0c97d89..f752f5458ff 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -1617,7 +1617,7 @@ class QRDecomposition(Generic[QT, RT]):
self._Q = Q
self._R = R
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def Q(self) -> QT:
"""
@@ -1626,7 +1626,7 @@ class QRDecomposition(Generic[QT, RT]):
"""
return self._Q
- @property # type: ignore[misc]
+ @property
@since("2.0.0")
def R(self) -> RT:
"""
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index 1a2e38f81e7..ecdb4e75ed4 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -500,7 +500,7 @@ class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
.. versionadded:: 2.2.0
"""
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def U(self) -> Optional[UT]: # type: ignore[return]
"""
@@ -517,7 +517,7 @@ class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
else:
raise TypeError("Expected RowMatrix/IndexedRowMatrix got %s" % mat_name)
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def s(self) -> Vector:
"""
@@ -525,7 +525,7 @@ class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
"""
return self.call("s")
- @property # type: ignore[misc]
+ @property
@since("2.2.0")
def V(self) -> VT:
"""
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 55eae10893e..7ff8fddf88d 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -208,7 +208,7 @@ class MatrixFactorizationModel(
"""
return self.call("wrappedRecommendUsersForProducts", num)
- @property # type: ignore[misc]
+ @property
@since("1.4.0")
def rank(self) -> int:
"""Rank for the features in this model"""
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 18f37b4a71a..cac3294ade6 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -117,13 +117,13 @@ class LinearModel:
self._coeff = _convert_to_vector(weights)
self._intercept = float(intercept)
- @property # type: ignore[misc]
+ @property
@since("1.0.0")
def weights(self) -> Vector:
"""Weights computed for every feature."""
return self._coeff
- @property # type: ignore[misc]
+ @property
@since("1.0.0")
def intercept(self) -> float:
"""Intercept computed for this model."""
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 67bb1f36305..686b036bb9e 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -112,7 +112,7 @@ class Observation:
return DataFrame(observed_df, df.sparkSession)
# Note that decorated property only works with Python 3.9+ which Spark Connect requires.
- @property # type: ignore[misc]
+ @property
@try_remote_observation
def get(self) -> Dict[str, Any]:
"""Get the observed metrics.
diff --git a/python/pyspark/sql/tests/typing/test_dataframe.yml b/python/pyspark/sql/tests/typing/test_dataframe.yml
index 79a3bcd8dfc..d32a09cea82 100644
--- a/python/pyspark/sql/tests/typing/test_dataframe.yml
+++ b/python/pyspark/sql/tests/typing/test_dataframe.yml
@@ -123,9 +123,9 @@
out: |
main:10: error: No overload variant of "drop" of "DataFrame" matches argument types "Column", "Column" [call-overload]
- main:10: note: Possible overload variant:
+ main:10: note: Possible overload variants:
+ main:10: note: def drop(self, cols: Union[Column, str]) -> DataFrame
main:10: note: def drop(self, *cols: str) -> DataFrame
- main:10: note: <1 more non-matching overload not shown>
- case: fillNullValues
diff --git a/python/pyspark/sql/tests/typing/test_functions.yml b/python/pyspark/sql/tests/typing/test_functions.yml
index efb3293472d..6c80420bf0a 100644
--- a/python/pyspark/sql/tests/typing/test_functions.yml
+++ b/python/pyspark/sql/tests/typing/test_functions.yml
@@ -68,34 +68,34 @@
out: |
main:29: error: No overload variant of "array" matches argument types "List[Column]", "List[Column]" [call-overload]
- main:29: note: Possible overload variant:
+ main:29: note: Possible overload variants:
main:29: note: def array(*cols: Union[Column, str]) -> Column
- main:29: note: <1 more non-matching overload not shown>
+ main:29: note: def [ColumnOrName_] array(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:30: error: No overload variant of "create_map" matches argument types "List[Column]", "List[Column]" [call-overload]
- main:30: note: Possible overload variant:
+ main:30: note: Possible overload variants:
main:30: note: def create_map(*cols: Union[Column, str]) -> Column
- main:30: note: <1 more non-matching overload not shown>
+ main:30: note: def [ColumnOrName_] create_map(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:31: error: No overload variant of "map_concat" matches argument types "List[Column]", "List[Column]" [call-overload]
- main:31: note: Possible overload variant:
+ main:31: note: Possible overload variants:
main:31: note: def map_concat(*cols: Union[Column, str]) -> Column
- main:31: note: <1 more non-matching overload not shown>
+ main:31: note: def [ColumnOrName_] map_concat(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:32: error: No overload variant of "struct" matches argument types "List[str]", "List[str]" [call-overload]
- main:32: note: Possible overload variant:
+ main:32: note: Possible overload variants:
main:32: note: def struct(*cols: Union[Column, str]) -> Column
- main:32: note: <1 more non-matching overload not shown>
+ main:32: note: def [ColumnOrName_] struct(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:33: error: No overload variant of "array" matches argument types "List[str]", "List[str]" [call-overload]
- main:33: note: Possible overload variant:
+ main:33: note: Possible overload variants:
main:33: note: def array(*cols: Union[Column, str]) -> Column
- main:33: note: <1 more non-matching overload not shown>
+ main:33: note: def [ColumnOrName_] array(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:34: error: No overload variant of "create_map" matches argument types "List[str]", "List[str]" [call-overload]
- main:34: note: Possible overload variant:
+ main:34: note: Possible overload variants:
main:34: note: def create_map(*cols: Union[Column, str]) -> Column
- main:34: note: <1 more non-matching overload not shown>
+ main:34: note: def [ColumnOrName_] create_map(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:35: error: No overload variant of "map_concat" matches argument types "List[str]", "List[str]" [call-overload]
- main:35: note: Possible overload variant:
+ main:35: note: Possible overload variants:
main:35: note: def map_concat(*cols: Union[Column, str]) -> Column
- main:35: note: <1 more non-matching overload not shown>
+ main:35: note: def [ColumnOrName_] map_concat(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:36: error: No overload variant of "struct" matches argument types "List[str]", "List[str]" [call-overload]
- main:36: note: Possible overload variant:
+ main:36: note: Possible overload variants:
main:36: note: def struct(*cols: Union[Column, str]) -> Column
- main:36: note: <1 more non-matching overload not shown>
+ main:36: note: def [ColumnOrName_] struct(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
diff --git a/python/pyspark/sql/tests/typing/test_session.yml b/python/pyspark/sql/tests/typing/test_session.yml
index 70d0001c47c..5c9fd9f197a 100644
--- a/python/pyspark/sql/tests/typing/test_session.yml
+++ b/python/pyspark/sql/tests/typing/test_session.yml
@@ -97,7 +97,12 @@
main:18: note: Possible overload variants:
main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: Union[List[str], Tuple[str, ...]] = ..., samplingRatio: Optional[float] = ...) -> DataFrame
main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: Union[List[str], Tuple[str, ...]] = ..., samplingRatio: Optional[float] = ...) -> DataFrame
- main:18: note: <6 more non-matching overloads not shown>
+ main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: Union[StructType, str], *, verifySchema: bool = ...) -> DataFrame
+ main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: Union[StructType, str], *, verifySchema: bool = ...) -> DataFrame
+ main:18: note: def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: RDD[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame
+ main:18: note: def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: Iterable[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame
+ main:18: note: def createDataFrame(self, data: DataFrame, samplingRatio: Optional[float] = ...) -> DataFrame
+ main:18: note: def createDataFrame(self, data: DataFrame, schema: Union[StructType, str], verifySchema: bool = ...) -> DataFrame
- case: createDataFrameFromEmptyRdd
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index acc3f407f9d..db615d339b5 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -146,7 +146,7 @@ class DataTypeSingleton(type):
_instances: ClassVar[Dict[Type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
- def __call__(cls: Type[T]) -> T: # type: ignore[override]
+ def __call__(cls: Type[T]) -> T:
if cls not in cls._instances: # type: ignore[attr-defined]
cls._instances[cls] = super( # type: ignore[misc, attr-defined]
DataTypeSingleton, cls
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index ffebf99685d..84e7cd7fcc6 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -381,13 +381,13 @@ class StreamingContext:
Changes to the queue after the stream is created will not be recognized.
"""
if default and not isinstance(default, RDD):
- default = self._sc.parallelize(default) # type: ignore[arg-type]
+ default = self._sc.parallelize(default)
if not rdds and default:
rdds = [rdds] # type: ignore[list-item]
if rdds and not isinstance(rdds[0], RDD):
- rdds = [self._sc.parallelize(input) for input in rdds] # type: ignore[arg-type]
+ rdds = [self._sc.parallelize(input) for input in rdds]
self._check_serializers(rdds)
assert self._jvm is not None
diff --git a/python/pyspark/tests/typing/test_rdd.yml b/python/pyspark/tests/typing/test_rdd.yml
index 48965829cfd..358553327df 100644
--- a/python/pyspark/tests/typing/test_rdd.yml
+++ b/python/pyspark/tests/typing/test_rdd.yml
@@ -100,8 +100,8 @@
reveal_type(sc.parallelize([("a", 1)]).aggregateByKey(zero, seq_func, comb_func))
out: |
- main:11: note: Revealed type is "pyspark.rdd.RDD[builtins.str*]"
- main:16: note: Revealed type is "pyspark.rdd.RDD[builtins.int*]"
+ main:11: note: Revealed type is "pyspark.rdd.RDD[builtins.str]"
+ main:16: note: Revealed type is "pyspark.rdd.RDD[builtins.int]"
main:18: note: Revealed type is "pyspark.rdd.RDD[Tuple[builtins.str, builtins.int]]"
main:20: note: Revealed type is "Tuple[builtins.str, builtins.int]"
main:22: note: Revealed type is "builtins.int"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org