You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2017/06/06 20:55:37 UTC
[1/2] beam git commit: Migrate Python tests to not depend on fix
sharding for file output
Repository: beam
Updated Branches:
refs/heads/master 513c952fa -> e3139a38f
Migrate Python tests to not depend on fix sharding for file output
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b5c257d5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b5c257d5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b5c257d5
Branch: refs/heads/master
Commit: b5c257d5fa2e3445a37a8154bde706392c23c305
Parents: 513c952
Author: Charles Chen <cc...@google.com>
Authored: Mon Jun 5 16:31:13 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Jun 6 13:55:13 2017 -0700
----------------------------------------------------------------------
.../complete/juliaset/juliaset/juliaset_test.py | 5 +++--
.../apache_beam/examples/complete/tfidf_test.py | 5 +++--
.../examples/cookbook/group_with_coder_test.py | 5 +++--
.../examples/cookbook/mergecontacts_test.py | 3 ++-
.../examples/cookbook/multiple_output_pardo_test.py | 11 ++++++-----
.../examples/wordcount_debugging_test.py | 3 ++-
.../apache_beam/examples/wordcount_minimal_test.py | 3 ++-
sdks/python/apache_beam/examples/wordcount_test.py | 3 ++-
sdks/python/apache_beam/testing/util.py | 16 ++++++++++++++++
9 files changed, 39 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py
index 17d9cf3..91c75aa 100644
--- a/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py
+++ b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py
@@ -25,6 +25,7 @@ import unittest
from apache_beam.examples.complete.juliaset.juliaset import juliaset
+from apache_beam.testing.util import open_shards
class JuliaSetTest(unittest.TestCase):
@@ -60,8 +61,8 @@ class JuliaSetTest(unittest.TestCase):
# Parse the results from the file, and ensure it was written in the proper
# format.
- with open(self.test_files['output_coord_file_name'] +
- '-00000-of-00001') as result_file:
+ with open_shards(self.test_files['output_coord_file_name'] +
+ '-*-of-*') as result_file:
output_lines = result_file.readlines()
# Should have a line for each x-coordinate.
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/complete/tfidf_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/complete/tfidf_test.py b/sdks/python/apache_beam/examples/complete/tfidf_test.py
index 322426f..b6f8825 100644
--- a/sdks/python/apache_beam/examples/complete/tfidf_test.py
+++ b/sdks/python/apache_beam/examples/complete/tfidf_test.py
@@ -28,6 +28,7 @@ from apache_beam.examples.complete import tfidf
from apache_beam.testing.test_pipeline import TestPipeline
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
+from apache_beam.testing.util import open_shards
EXPECTED_RESULTS = set([
@@ -76,8 +77,8 @@ class TfIdfTest(unittest.TestCase):
'--output', os.path.join(temp_folder, 'result')])
# Parse result file and compare.
results = []
- with open(os.path.join(temp_folder,
- 'result-00000-of-00001')) as result_file:
+ with open_shards(os.path.join(
+ temp_folder, 'result-*-of-*')) as result_file:
for line in result_file:
match = re.search(EXPECTED_LINE_RE, line)
logging.info('Result line: %s', line)
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py b/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py
index 268ba8d..fb630ba 100644
--- a/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py
@@ -22,6 +22,7 @@ import tempfile
import unittest
from apache_beam.examples.cookbook import group_with_coder
+from apache_beam.testing.util import open_shards
# Patch group_with_coder.PlayerCoder.decode(). To test that the PlayerCoder was
@@ -53,7 +54,7 @@ class GroupWithCoderTest(unittest.TestCase):
'--output=%s.result' % temp_path])
# Parse result file and compare.
results = []
- with open(temp_path + '.result-00000-of-00001') as result_file:
+ with open_shards(temp_path + '.result-*-of-*') as result_file:
for line in result_file:
name, points = line.split(',')
results.append((name, int(points)))
@@ -74,7 +75,7 @@ class GroupWithCoderTest(unittest.TestCase):
'--output=%s.result' % temp_path])
# Parse result file and compare.
results = []
- with open(temp_path + '.result-00000-of-00001') as result_file:
+ with open_shards(temp_path + '.result-*-of-*') as result_file:
for line in result_file:
name, points = line.split(',')
results.append((name, int(points)))
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py b/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py
index b3be0dd..32a3d51 100644
--- a/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py
@@ -22,6 +22,7 @@ import tempfile
import unittest
from apache_beam.examples.cookbook import mergecontacts
+from apache_beam.testing.util import open_shards
class MergeContactsTest(unittest.TestCase):
@@ -114,7 +115,7 @@ class MergeContactsTest(unittest.TestCase):
'--output_tsv=%s.tsv' % result_prefix,
'--output_stats=%s.stats' % result_prefix], assert_results=(2, 1, 3))
- with open('%s.tsv-00000-of-00001' % result_prefix) as f:
+ with open_shards('%s.tsv-*-of-*' % result_prefix) as f:
contents = f.read()
self.assertEqual(self.EXPECTED_TSV, self.normalize_tsv_results(contents))
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py
index 3ddd668..1051106 100644
--- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py
@@ -23,6 +23,7 @@ import tempfile
import unittest
from apache_beam.examples.cookbook import multiple_output_pardo
+from apache_beam.testing.util import open_shards
class MultipleOutputParDo(unittest.TestCase):
@@ -37,9 +38,9 @@ class MultipleOutputParDo(unittest.TestCase):
f.write(contents)
return f.name
- def get_wordcount_results(self, temp_path):
+ def get_wordcount_results(self, result_path):
results = []
- with open(temp_path) as result_file:
+ with open_shards(result_path) as result_file:
for line in result_file:
match = re.search(r'([A-Za-z]+): ([0-9]+)', line)
if match is not None:
@@ -55,15 +56,15 @@ class MultipleOutputParDo(unittest.TestCase):
'--output=%s' % result_prefix])
expected_char_count = len(''.join(self.SAMPLE_TEXT.split('\n')))
- with open(result_prefix + '-chars-00000-of-00001') as f:
+ with open_shards(result_prefix + '-chars-*-of-*') as f:
contents = f.read()
self.assertEqual(expected_char_count, int(contents))
short_words = self.get_wordcount_results(
- result_prefix + '-short-words-00000-of-00001')
+ result_prefix + '-short-words-*-of-*')
self.assertEqual(sorted(short_words), sorted(self.EXPECTED_SHORT_WORDS))
- words = self.get_wordcount_results(result_prefix + '-words-00000-of-00001')
+ words = self.get_wordcount_results(result_prefix + '-words-*-of-*')
self.assertEqual(sorted(words), sorted(self.EXPECTED_WORDS))
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/wordcount_debugging_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/wordcount_debugging_test.py b/sdks/python/apache_beam/examples/wordcount_debugging_test.py
index 900a8e7..92ee240 100644
--- a/sdks/python/apache_beam/examples/wordcount_debugging_test.py
+++ b/sdks/python/apache_beam/examples/wordcount_debugging_test.py
@@ -23,6 +23,7 @@ import tempfile
import unittest
from apache_beam.examples import wordcount_debugging
+from apache_beam.testing.util import open_shards
class WordCountTest(unittest.TestCase):
@@ -36,7 +37,7 @@ class WordCountTest(unittest.TestCase):
def get_results(self, temp_path):
results = []
- with open(temp_path + '.result-00000-of-00001') as result_file:
+ with open_shards(temp_path + '.result-*-of-*') as result_file:
for line in result_file:
match = re.search(r'([A-Za-z]+): ([0-9]+)', line)
if match is not None:
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/wordcount_minimal_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/wordcount_minimal_test.py b/sdks/python/apache_beam/examples/wordcount_minimal_test.py
index 82bace4..5ee7b78 100644
--- a/sdks/python/apache_beam/examples/wordcount_minimal_test.py
+++ b/sdks/python/apache_beam/examples/wordcount_minimal_test.py
@@ -24,6 +24,7 @@ import tempfile
import unittest
from apache_beam.examples import wordcount_minimal
+from apache_beam.testing.util import open_shards
class WordCountMinimalTest(unittest.TestCase):
@@ -46,7 +47,7 @@ class WordCountMinimalTest(unittest.TestCase):
'--output=%s.result' % temp_path])
# Parse result file and compare.
results = []
- with open(temp_path + '.result-00000-of-00001') as result_file:
+ with open_shards(temp_path + '.result-*-of-*') as result_file:
for line in result_file:
match = re.search(r'([a-z]+): ([0-9]+)', line)
if match is not None:
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/examples/wordcount_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/wordcount_test.py b/sdks/python/apache_beam/examples/wordcount_test.py
index 616540b..9834ba5 100644
--- a/sdks/python/apache_beam/examples/wordcount_test.py
+++ b/sdks/python/apache_beam/examples/wordcount_test.py
@@ -24,6 +24,7 @@ import tempfile
import unittest
from apache_beam.examples import wordcount
+from apache_beam.testing.util import open_shards
class WordCountTest(unittest.TestCase):
@@ -45,7 +46,7 @@ class WordCountTest(unittest.TestCase):
'--output=%s.result' % temp_path])
# Parse result file and compare.
results = []
- with open(temp_path + '.result-00000-of-00001') as result_file:
+ with open_shards(temp_path + '.result-*-of-*') as result_file:
for line in result_file:
match = re.search(r'([a-z]+): ([0-9]+)', line)
if match is not None:
http://git-wip-us.apache.org/repos/asf/beam/blob/b5c257d5/sdks/python/apache_beam/testing/util.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/testing/util.py b/sdks/python/apache_beam/testing/util.py
index 60a6b21..959f25f 100644
--- a/sdks/python/apache_beam/testing/util.py
+++ b/sdks/python/apache_beam/testing/util.py
@@ -19,6 +19,9 @@
from __future__ import absolute_import
+import glob
+import tempfile
+
from apache_beam import pvalue
from apache_beam.transforms import window
from apache_beam.transforms.core import Create
@@ -26,12 +29,15 @@ from apache_beam.transforms.core import Map
from apache_beam.transforms.core import WindowInto
from apache_beam.transforms.util import CoGroupByKey
from apache_beam.transforms.ptransform import PTransform
+from apache_beam.utils.annotations import experimental
__all__ = [
'assert_that',
'equal_to',
'is_empty',
+ # open_shards is internal and has no backwards compatibility guarantees.
+ 'open_shards',
]
@@ -105,3 +111,13 @@ def assert_that(actual, matcher, label='assert_that'):
return label
actual | AssertThat() # pylint: disable=expression-not-assigned
+
+
+@experimental()
+def open_shards(glob_pattern):
+ """Returns a composite file of all shards matching the given glob pattern."""
+ with tempfile.NamedTemporaryFile(delete=False) as f:
+ for shard in glob.glob(glob_pattern):
+ f.write(file(shard).read())
+ concatenated_file_name = f.name
+ return file(concatenated_file_name, 'rb')
[2/2] beam git commit: This closes #3299
Posted by al...@apache.org.
This closes #3299
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e3139a38
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e3139a38
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e3139a38
Branch: refs/heads/master
Commit: e3139a38ff533c21fbc70f85eafcb1d68b52a4b0
Parents: 513c952 b5c257d
Author: Ahmet Altay <al...@google.com>
Authored: Tue Jun 6 13:55:25 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Jun 6 13:55:25 2017 -0700
----------------------------------------------------------------------
.../complete/juliaset/juliaset/juliaset_test.py | 5 +++--
.../apache_beam/examples/complete/tfidf_test.py | 5 +++--
.../examples/cookbook/group_with_coder_test.py | 5 +++--
.../examples/cookbook/mergecontacts_test.py | 3 ++-
.../examples/cookbook/multiple_output_pardo_test.py | 11 ++++++-----
.../examples/wordcount_debugging_test.py | 3 ++-
.../apache_beam/examples/wordcount_minimal_test.py | 3 ++-
sdks/python/apache_beam/examples/wordcount_test.py | 3 ++-
sdks/python/apache_beam/testing/util.py | 16 ++++++++++++++++
9 files changed, 39 insertions(+), 15 deletions(-)
----------------------------------------------------------------------