You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by tv...@apache.org on 2022/05/04 08:14:07 UTC
[beam] branch master updated: add __Init__ to inference. (#17514)
This is an automated email from the ASF dual-hosted git repository.
tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 731237719a6 add __Init__ to inference. (#17514)
731237719a6 is described below
commit 731237719a6219e923c3b09a36915923dd417551
Author: Anand Inguva <34...@users.noreply.github.com>
AuthorDate: Wed May 4 08:13:55 2022 +0000
add __Init__ to inference. (#17514)
Co-authored-by: Andy Ye <an...@gmail.com>
---
sdks/python/apache_beam/ml/inference/__init__.py | 16 ++++++++++++++++
sdks/python/apache_beam/ml/inference/api.py | 3 ++-
sdks/python/apache_beam/ml/inference/base.py | 11 ++++++-----
sdks/python/apache_beam/ml/inference/base_test.py | 2 +-
sdks/python/apache_beam/ml/inference/pytorch.py | 19 +++++++++++--------
.../apache_beam/ml/inference/sklearn_inference.py | 11 ++++++-----
.../ml/inference/sklearn_inference_test.py | 6 +++---
sdks/python/scripts/generate_pydoc.sh | 11 -----------
sdks/python/tox.ini | 1 +
9 files changed, 46 insertions(+), 34 deletions(-)
diff --git a/sdks/python/apache_beam/ml/inference/__init__.py b/sdks/python/apache_beam/ml/inference/__init__.py
new file mode 100644
index 00000000000..cce3acad34a
--- /dev/null
+++ b/sdks/python/apache_beam/ml/inference/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/sdks/python/apache_beam/ml/inference/api.py b/sdks/python/apache_beam/ml/inference/api.py
index 73dd87bb4e9..10a5a306704 100644
--- a/sdks/python/apache_beam/ml/inference/api.py
+++ b/sdks/python/apache_beam/ml/inference/api.py
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# mypy: ignore-errors
from dataclasses import dataclass
from typing import Tuple
@@ -51,7 +52,7 @@ class RunInference(beam.PTransform):
TODO(BEAM-14046): Add and link to help documentation
"""
- def __init__(self, model_loader: base.ModelLoader) -> beam.pvalue.PCollection:
+ def __init__(self, model_loader: base.ModelLoader):
self._model_loader = model_loader
def expand(self, pcoll: beam.PCollection) -> beam.PCollection:
diff --git a/sdks/python/apache_beam/ml/inference/base.py b/sdks/python/apache_beam/ml/inference/base.py
index 80490285edd..3c7e6fe0e3d 100644
--- a/sdks/python/apache_beam/ml/inference/base.py
+++ b/sdks/python/apache_beam/ml/inference/base.py
@@ -44,10 +44,10 @@ import apache_beam as beam
from apache_beam.utils import shared
try:
- # pylint: disable=g-import-not-at-top
+ # pylint: disable=wrong-import-order, wrong-import-position
import resource
except ImportError:
- resource = None
+ resource = None # type: ignore[assignment]
_MICROSECOND_TO_MILLISECOND = 1000
_NANOSECOND_TO_MICROSECOND = 1000
@@ -59,7 +59,8 @@ T = TypeVar('T')
class InferenceRunner():
"""Implements running inferences for a framework."""
def run_inference(self, batch: List[Any], model: Any) -> Iterable[Any]:
- """Runs inferences on a batch of examples and returns an Iterable of Predictions."""
+ """Runs inferences on a batch of examples and
+ returns an Iterable of Predictions."""
raise NotImplementedError(type(self))
def get_num_bytes(self, batch: Any) -> int:
@@ -67,7 +68,7 @@ class InferenceRunner():
return len(pickle.dumps(batch))
def get_metrics_namespace(self) -> str:
- """Returns a namespace for metrics collected by the RunInference transform."""
+ """Returns a namespace for metrics collected by RunInference transform."""
return 'RunInference'
@@ -249,7 +250,7 @@ class _Clock(object):
class _FineGrainedClock(_Clock):
def get_current_time_in_microseconds(self) -> int:
return int(
- time.clock_gettime_ns(time.CLOCK_REALTIME) / # pytype: disable=module-attr
+ time.clock_gettime_ns(time.CLOCK_REALTIME) / # type: ignore[attr-defined]
_NANOSECOND_TO_MICROSECOND)
diff --git a/sdks/python/apache_beam/ml/inference/base_test.py b/sdks/python/apache_beam/ml/inference/base_test.py
index ab7bb16383e..55936f63ed4 100644
--- a/sdks/python/apache_beam/ml/inference/base_test.py
+++ b/sdks/python/apache_beam/ml/inference/base_test.py
@@ -23,8 +23,8 @@ from typing import Any
from typing import Iterable
import apache_beam as beam
-import apache_beam.ml.inference.base as base
from apache_beam.metrics.metric import MetricsFilter
+from apache_beam.ml.inference import base
from apache_beam.testing.test_pipeline import TestPipeline
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
diff --git a/sdks/python/apache_beam/ml/inference/pytorch.py b/sdks/python/apache_beam/ml/inference/pytorch.py
index cb0935e3a3a..1eac09af3a7 100644
--- a/sdks/python/apache_beam/ml/inference/pytorch.py
+++ b/sdks/python/apache_beam/ml/inference/pytorch.py
@@ -49,10 +49,10 @@ class PytorchInferenceRunner(InferenceRunner):
the inference call.
"""
- batch = torch.stack(batch)
- if batch.device != self._device:
- batch = batch.to(self._device)
- predictions = model(batch)
+ torch_batch = torch.stack(batch)
+ if torch_batch.device != self._device:
+ torch_batch = torch_batch.to(self._device)
+ predictions = model(torch_batch)
return [PredictionResult(x, y) for x, y in zip(batch, predictions)]
def get_num_bytes(self, batch: List[torch.Tensor]) -> int:
@@ -75,10 +75,13 @@ class PytorchModelLoader(ModelLoader):
model_params: Dict[str, Any],
device: str = 'CPU'):
"""
- state_dict_path: path to the saved dictionary of the model state.
- model_class: class of the Pytorch model that defines the model structure.
- device: the device on which you wish to run the model. If ``device = GPU``
- then device will be cuda if it is available. Otherwise, it will be cpu.
+ Initializes a PytorchModelLoader
+ :param state_dict_path: path to the saved dictionary of the model state.
+ :param model_class: class of the Pytorch model that defines the model
+ structure.
+ :param device: the device on which you wish to run the model. If
+ ``device = GPU`` then a GPU device will be used if it is available.
+ Otherwise, it will be CPU.
See https://pytorch.org/tutorials/beginner/saving_loading_models.html
for details
diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference.py b/sdks/python/apache_beam/ml/inference/sklearn_inference.py
index e0890a725c5..3b56eada881 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference.py
@@ -42,14 +42,14 @@ class ModelFileType(enum.Enum):
class SklearnInferenceRunner(InferenceRunner):
- def run_inference(self, batch: List[numpy.array],
- model: Any) -> Iterable[numpy.array]:
+ def run_inference(self, batch: List[numpy.ndarray],
+ model: Any) -> Iterable[PredictionResult]:
# vectorize data for better performance
vectorized_batch = numpy.stack(batch, axis=0)
predictions = model.predict(vectorized_batch)
return [PredictionResult(x, y) for x, y in zip(batch, predictions)]
- def get_num_bytes(self, batch: List[numpy.array]) -> int:
+ def get_num_bytes(self, batch: List[numpy.ndarray]) -> int:
"""Returns the number of bytes of data for a batch."""
return sum(sys.getsizeof(element) for element in batch)
@@ -71,8 +71,9 @@ class SklearnModelLoader(ModelLoader):
elif self._model_file_type == ModelFileType.JOBLIB:
if not joblib:
raise ImportError(
- 'Could not import joblib in this execution'
- ' environment. For help with managing dependencies on Python workers see https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/'
+ 'Could not import joblib in this execution environment. '
+ 'For help with managing dependencies on Python workers.'
+ 'see https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/' # pylint: disable=line-too-long
)
return joblib.load(file)
raise AssertionError('Unsupported serialization type.')
diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py
index a35a6922957..f1efe73c96c 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py
@@ -30,8 +30,8 @@ import numpy
from sklearn import svm
import apache_beam as beam
-import apache_beam.ml.inference.api as api
-import apache_beam.ml.inference.base as base
+from apache_beam.ml.inference import api
+from apache_beam.ml.inference import base
from apache_beam.ml.inference.sklearn_inference import ModelFileType
from apache_beam.ml.inference.sklearn_inference import SklearnInferenceRunner
from apache_beam.ml.inference.sklearn_inference import SklearnModelLoader
@@ -49,7 +49,7 @@ class FakeModel:
def __init__(self):
self.total_predict_calls = 0
- def predict(self, input_vector: numpy.array):
+ def predict(self, input_vector: numpy.ndarray):
self.total_predict_calls += 1
return numpy.sum(input_vector, axis=1)
diff --git a/sdks/python/scripts/generate_pydoc.sh b/sdks/python/scripts/generate_pydoc.sh
index 7890ecd9665..cb86043b34a 100755
--- a/sdks/python/scripts/generate_pydoc.sh
+++ b/sdks/python/scripts/generate_pydoc.sh
@@ -235,17 +235,6 @@ nitpick_ignore = []
nitpick_ignore += [('py:class', iden) for iden in ignore_identifiers]
nitpick_ignore += [('py:obj', iden) for iden in ignore_identifiers]
nitpick_ignore += [('py:exc', iden) for iden in ignore_references]
-
-# Monkey patch functools.wraps to retain original function argument signature
-# for documentation.
-# https://github.com/sphinx-doc/sphinx/issues/1711
-import functools
-def fake_wraps(wrapped):
- def wrapper(decorator):
- return wrapped
- return wrapper
-
-functools.wraps = fake_wraps
EOF
#=== index.rst ===#
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 858db8590a8..14b94291cfa 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -144,6 +144,7 @@ deps =
sphinx_rtd_theme==0.4.3
docutils<0.18
Jinja2==3.0.3 # TODO(BEAM-14172): Sphinx version is too old.
+ torch
commands =
time {toxinidir}/scripts/generate_pydoc.sh