You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2016/08/10 20:49:02 UTC

[1/3] incubator-beam git commit: fixup! updates an error message and adds a test for error path.

Repository: incubator-beam
Updated Branches:
  refs/heads/python-sdk 127e763c1 -> 450b64754


fixup! updates an error message and adds a test for error path.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/04c650f8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/04c650f8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/04c650f8

Branch: refs/heads/python-sdk
Commit: 04c650f88b661b6af056eaf4d16795b180a3df8c
Parents: d8a76a5
Author: Chamikara Jayalath <ch...@google.com>
Authored: Mon Aug 8 16:33:43 2016 -0700
Committer: Chamikara Jayalath <ch...@google.com>
Committed: Mon Aug 8 16:35:25 2016 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/fileio.py     |  4 ++--
 sdks/python/apache_beam/io/gcsio_test.py | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/04c650f8/sdks/python/apache_beam/io/fileio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py
index ecacb9f..9b98a43 100644
--- a/sdks/python/apache_beam/io/fileio.py
+++ b/sdks/python/apache_beam/io/fileio.py
@@ -533,8 +533,8 @@ class FileSink(iobase.Sink):
         try:
           exists = channel_factory.exists(final_name)
         except Exception as exists_e:  # pylint: disable=broad-except
-          logging.warning('Exception when invoking channel_factory.exists(): '
-                          '%s', exists_e)
+          logging.warning('Exception when checking if file %s exists: '
+                          '%s', final_name, exists_e)
           # Returning original exception after logging the exception from
           # exists() call.
           return (None, e)

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/04c650f8/sdks/python/apache_beam/io/gcsio_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcsio_test.py b/sdks/python/apache_beam/io/gcsio_test.py
index b909fdd..99c99b3 100644
--- a/sdks/python/apache_beam/io/gcsio_test.py
+++ b/sdks/python/apache_beam/io/gcsio_test.py
@@ -29,6 +29,7 @@ from apitools.base.py.exceptions import HttpError
 from apache_beam.internal.clients import storage
 
 from apache_beam.io import gcsio
+from mock import patch
 
 
 class FakeGcsClient(object):
@@ -197,6 +198,18 @@ class TestGCSIO(unittest.TestCase):
     self.assertFalse(self.gcs.exists(file_name + 'xyz'))
     self.assertTrue(self.gcs.exists(file_name))
 
+  @patch.object(FakeGcsObjects, 'Get')
+  def test_exists_failure(self, mock_get):
+    # Raising an error other than 404. Raising 404 is a valid failure for
+    # exists() call.
+    mock_get.side_effect = HttpError({'status':400}, None, None)
+    file_name = 'gs://gcsio-test/dummy_file'
+    file_size = 1234
+    self._insert_random_file(self.client, file_name, file_size)
+    with self.assertRaises(HttpError) as cm:
+      self.gcs.exists(file_name)
+    self.assertEquals(400, cm.exception.status_code)
+
   def test_size(self):
     file_name = 'gs://gcsio-test/dummy_file'
     file_size = 1234


[3/3] incubator-beam git commit: This closes #779

Posted by ke...@apache.org.
This closes #779


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/450b6475
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/450b6475
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/450b6475

Branch: refs/heads/python-sdk
Commit: 450b6475412003aec0b82dc62373e15397106fb0
Parents: 127e763 04c650f
Author: Kenneth Knowles <kl...@google.com>
Authored: Wed Aug 10 13:48:50 2016 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Wed Aug 10 13:48:50 2016 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/fileio.py     | 13 ++++++++++---
 sdks/python/apache_beam/io/gcsio.py      |  9 +++++++--
 sdks/python/apache_beam/io/gcsio_test.py | 23 ++++++++++++++++++++++-
 3 files changed, 39 insertions(+), 6 deletions(-)
----------------------------------------------------------------------



[2/3] incubator-beam git commit: Fixes GcsIO.exists() to properly handle files that do not exist.

Posted by ke...@apache.org.
Fixes GcsIO.exists() to properly handle files that do not exist.

Currently this invocation fails for non existing files instead of returning false.

Updates FileSink.finalize_write() so that we capture and log any transient errors that get thrown at the channel_factory.exists() call.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/d8a76a53
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/d8a76a53
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/d8a76a53

Branch: refs/heads/python-sdk
Commit: d8a76a53cf60209d13f929ddaec87cd95f6a040a
Parents: d72ffb0
Author: Chamikara Jayalath <ch...@google.com>
Authored: Wed Aug 3 18:25:41 2016 -0700
Committer: Chamikara Jayalath <ch...@google.com>
Committed: Mon Aug 8 16:35:25 2016 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/fileio.py     | 13 ++++++++++---
 sdks/python/apache_beam/io/gcsio.py      |  9 +++++++--
 sdks/python/apache_beam/io/gcsio_test.py | 10 +++++++++-
 3 files changed, 26 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d8a76a53/sdks/python/apache_beam/io/fileio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py
index b1e091b..ecacb9f 100644
--- a/sdks/python/apache_beam/io/fileio.py
+++ b/sdks/python/apache_beam/io/fileio.py
@@ -530,15 +530,22 @@ class FileSink(iobase.Sink):
         channel_factory.rename(old_name, final_name)
       except IOError as e:
         # May have already been copied.
-        exists = channel_factory.exists(final_name)
+        try:
+          exists = channel_factory.exists(final_name)
+        except Exception as exists_e:  # pylint: disable=broad-except
+          logging.warning('Exception when invoking channel_factory.exists(): '
+                          '%s', exists_e)
+          # Returning original exception after logging the exception from
+          # exists() call.
+          return (None, e)
         if not exists:
           logging.warning(('IOError in _rename_file. old_name: %s, '
                            'final_name: %s, err: %s'), old_name, final_name, e)
-          return(None, e)
+          return (None, e)
       except Exception as e:  # pylint: disable=broad-except
         logging.warning(('Exception in _rename_file. old_name: %s, '
                          'final_name: %s, err: %s'), old_name, final_name, e)
-        return(None, e)
+        return (None, e)
       return (final_name, None)
 
     # ThreadPool crashes in old versions of Python (< 2.7.5) if created from a

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d8a76a53/sdks/python/apache_beam/io/gcsio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcsio.py b/sdks/python/apache_beam/io/gcsio.py
index 88fcfb8..7bb532c 100644
--- a/sdks/python/apache_beam/io/gcsio.py
+++ b/sdks/python/apache_beam/io/gcsio.py
@@ -234,8 +234,13 @@ class GcsIO(object):
                                                  object=object_path)
       self.client.objects.Get(request)  # metadata
       return True
-    except IOError:
-      return False
+    except HttpError as http_error:
+      if http_error.status_code == 404:
+        # HTTP 404 indicates that the file did not exist
+        return False
+      else:
+        # We re-raise all other exceptions
+        raise
 
   @retry.with_exponential_backoff(
       retry_filter=retry.retry_on_server_errors_and_timeout_filter)

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d8a76a53/sdks/python/apache_beam/io/gcsio_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcsio_test.py b/sdks/python/apache_beam/io/gcsio_test.py
index 7b15ef3..b909fdd 100644
--- a/sdks/python/apache_beam/io/gcsio_test.py
+++ b/sdks/python/apache_beam/io/gcsio_test.py
@@ -79,7 +79,8 @@ class FakeGcsObjects(object):
   def Get(self, get_request, download=None):  # pylint: disable=invalid-name
     f = self.get_file(get_request.bucket, get_request.object)
     if f is None:
-      raise ValueError('Specified object does not exist.')
+      # Failing with a HTTP 404 if file does not exist.
+      raise HttpError({'status':404}, None, None)
     if download is None:
       return f.get_metadata()
     else:
@@ -189,6 +190,13 @@ class TestGCSIO(unittest.TestCase):
     self.client = FakeGcsClient()
     self.gcs = gcsio.GcsIO(self.client)
 
+  def test_exists(self):
+    file_name = 'gs://gcsio-test/dummy_file'
+    file_size = 1234
+    self._insert_random_file(self.client, file_name, file_size)
+    self.assertFalse(self.gcs.exists(file_name + 'xyz'))
+    self.assertTrue(self.gcs.exists(file_name))
+
   def test_size(self):
     file_name = 'gs://gcsio-test/dummy_file'
     file_size = 1234