You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by da...@apache.org on 2023/02/28 14:50:04 UTC
[beam] 01/01: Fix tensorflowhub caching issue
This is an automated email from the ASF dual-hosted git repository.
damccorm pushed a commit to branch users/damccorm/tfhub-test
in repository https://gitbox.apache.org/repos/asf/beam.git
commit 167ace851fc250b980576f7812ae55519a609450
Author: Danny McCormick <da...@google.com>
AuthorDate: Tue Feb 28 09:49:25 2023 -0500
Fix tensorflowhub caching issue
---
.../ml/inference/tensorflow_inference_it_test.py | 23 ++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py
index fb1a2964841..4e044082ac0 100644
--- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py
+++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py
@@ -25,10 +25,12 @@ import pytest
from apache_beam.io.filesystems import FileSystems
from apache_beam.testing.test_pipeline import TestPipeline
+from pathlib import Path
# pylint: disable=ungrouped-imports
try:
import tensorflow as tf
+ import tensorflow_hub as hub
from apache_beam.examples.inference import tensorflow_imagenet_segmentation
from apache_beam.examples.inference import tensorflow_mnist_classification
from apache_beam.examples.inference import tensorflow_mnist_with_weights
@@ -42,6 +44,26 @@ def process_outputs(filepath):
lines = [l.decode('utf-8').strip('\n') for l in lines]
return lines
+def rmdir(directory):
+ directory = Path(directory)
+ for item in directory.iterdir():
+ if item.is_dir():
+ rmdir(item)
+ else:
+ item.unlink()
+ directory.rmdir()
+
+def clear_tf_hub_temp_dir(model_path):
+ # When loading a tensorflow hub using tfhub.resolve, the model is saved in a
+ # temporary directory. That file can be persisted between test runs, in which
+ # case tfhub.resolve will no-op. If the model is deleted and the file isn't,
+ # tfhub.resolve will still no-op and tf.keras.models.load_model will throw.
+ # To avoid this (and test more robustly) we delete the temporary directory
+ # entirely between runs.
+ local_path = hub.resolve(model_path)
+ rmdir(local_path)
+
+
@unittest.skipIf(
tf is None, 'Missing dependencies. '
@@ -90,6 +112,7 @@ class TensorflowInference(unittest.TestCase):
output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt'])
model_path = (
'https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4')
+ clear_tf_hub_temp_dir(model_path)
extra_opts = {
'input': input_file,
'output': output_file,