You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2019/01/23 18:39:10 UTC
[beam] branch master updated: Python 3 port io.parquetio module
This is an automated email from the ASF dual-hosted git repository.
altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 11601b5 Python 3 port io.parquetio module
new c25403b Merge pull request #7582 from RobbeSneyders/parquetio
11601b5 is described below
commit 11601b531393853603cd5ee04fe888d3dccd57c7
Author: robbe <ro...@ml6.eu>
AuthorDate: Mon Jan 21 14:14:27 2019 +0100
Python 3 port io.parquetio module
---
sdks/python/apache_beam/io/parquetio_test.py | 30 ++++++++++++++-----------
sdks/python/apache_beam/io/source_test_utils.py | 7 +++---
sdks/python/tox.ini | 2 +-
3 files changed, 22 insertions(+), 17 deletions(-)
diff --git a/sdks/python/apache_beam/io/parquetio_test.py b/sdks/python/apache_beam/io/parquetio_test.py
index 8f65c34..d7dd842 100644
--- a/sdks/python/apache_beam/io/parquetio_test.py
+++ b/sdks/python/apache_beam/io/parquetio_test.py
@@ -72,29 +72,33 @@ class TestParquet(unittest.TestCase):
self.RECORDS = [{'name': 'Thomas',
'favorite_number': 1,
- 'favorite_color': 'blue'}, {'name': 'Henry',
- 'favorite_number': 3,
- 'favorite_color': 'green'},
+ 'favorite_color': 'blue'},
+ {'name': 'Henry',
+ 'favorite_number': 3,
+ 'favorite_color': 'green'},
{'name': 'Toby',
'favorite_number': 7,
- 'favorite_color': 'brown'}, {'name': 'Gordon',
- 'favorite_number': 4,
- 'favorite_color': 'blue'},
+ 'favorite_color': 'brown'},
+ {'name': 'Gordon',
+ 'favorite_number': 4,
+ 'favorite_color': 'blue'},
{'name': 'Emily',
'favorite_number': -1,
- 'favorite_color': 'Red'}, {'name': 'Percy',
- 'favorite_number': 6,
- 'favorite_color': 'Green'}]
+ 'favorite_color': 'Red'},
+ {'name': 'Percy',
+ 'favorite_number': 6,
+ 'favorite_color': 'Green'}]
+
self.SCHEMA = pa.schema([
- ('name', pa.binary()),
+ ('name', pa.string()),
('favorite_number', pa.int64()),
- ('favorite_color', pa.binary())
+ ('favorite_color', pa.string())
])
self.SCHEMA96 = pa.schema([
- ('name', pa.binary()),
+ ('name', pa.string()),
('favorite_number', pa.timestamp('ns')),
- ('favorite_color', pa.binary())
+ ('favorite_color', pa.string())
])
def tearDown(self):
diff --git a/sdks/python/apache_beam/io/source_test_utils.py b/sdks/python/apache_beam/io/source_test_utils.py
index f60fafb..d90d245 100644
--- a/sdks/python/apache_beam/io/source_test_utils.py
+++ b/sdks/python/apache_beam/io/source_test_utils.py
@@ -56,6 +56,7 @@ from collections import namedtuple
from multiprocessing.pool import ThreadPool
from apache_beam.io import iobase
+from apache_beam.testing.util import equal_to
__all__ = ['read_from_source',
'assert_sources_equal_reference_source',
@@ -174,7 +175,7 @@ def assert_sources_equal_reference_source(reference_source_info, sources_info):
'list of sources. Number of records were %d and %d instead.'
% (len(reference_records), len(source_records)))
- if sorted(reference_records) != sorted(source_records):
+ if equal_to(reference_records)(source_records):
raise ValueError(
'Reference source and provided list of sources must produce the '
'same set of records.')
@@ -224,13 +225,13 @@ def assert_reentrant_reads_succeed(source_info):
for val in read_iter:
original_read.append(val)
- if sorted(original_read) != sorted(expected_values):
+ if equal_to(original_read)(expected_values):
raise ValueError('Source did not produce expected values when '
'performing a reentrant read after reading %d values. '
'Expected %r received %r.'
% (i, expected_values, original_read))
- if sorted(reentrant_read) != sorted(expected_values):
+ if equal_to(reentrant_read)(expected_values):
raise ValueError('A reentrant read of source after reading %d values '
'did not produce expected values. Expected %r '
'received %r.'
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 2ffca7b..152226f 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -58,7 +58,7 @@ setenv =
BEAM_EXPERIMENTAL_PY3=1
RUN_SKIPPED_PY3_TESTS=0
modules =
- apache_beam.typehints,apache_beam.coders,apache_beam.options,apache_beam.tools,apache_beam.utils,apache_beam.internal,apache_beam.metrics,apache_beam.portability,apache_beam.pipeline_test,apache_beam.pvalue_test,apache_beam.runners,apache_beam.io.hadoopfilesystem_test,apache_beam.io.hdfs_integration_test,apache_beam.io.gcp.tests.utils_test,apache_beam.io.gcp.big_query_query_to_table_it_test,apache_beam.io.gcp.bigquery_io_read_it_test,apache_beam.io.gcp.bigquery_test,apache_beam.io.gcp. [...]
+ apache_beam.typehints,apache_beam.coders,apache_beam.options,apache_beam.tools,apache_beam.utils,apache_beam.internal,apache_beam.metrics,apache_beam.portability,apache_beam.pipeline_test,apache_beam.pvalue_test,apache_beam.runners,apache_beam.io.hadoopfilesystem_test,apache_beam.io.hdfs_integration_test,apache_beam.io.gcp.tests.utils_test,apache_beam.io.gcp.big_query_query_to_table_it_test,apache_beam.io.gcp.bigquery_io_read_it_test,apache_beam.io.gcp.bigquery_test,apache_beam.io.gcp. [...]
commands =
python --version
pip --version