You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2019/01/23 18:39:10 UTC

[beam] branch master updated: Python 3 port io.parquetio module

This is an automated email from the ASF dual-hosted git repository.

altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 11601b5  Python 3 port io.parquetio module
     new c25403b  Merge pull request #7582 from RobbeSneyders/parquetio
11601b5 is described below

commit 11601b531393853603cd5ee04fe888d3dccd57c7
Author: robbe <ro...@ml6.eu>
AuthorDate: Mon Jan 21 14:14:27 2019 +0100

    Python 3 port io.parquetio module
---
 sdks/python/apache_beam/io/parquetio_test.py    | 30 ++++++++++++++-----------
 sdks/python/apache_beam/io/source_test_utils.py |  7 +++---
 sdks/python/tox.ini                             |  2 +-
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/sdks/python/apache_beam/io/parquetio_test.py b/sdks/python/apache_beam/io/parquetio_test.py
index 8f65c34..d7dd842 100644
--- a/sdks/python/apache_beam/io/parquetio_test.py
+++ b/sdks/python/apache_beam/io/parquetio_test.py
@@ -72,29 +72,33 @@ class TestParquet(unittest.TestCase):
 
     self.RECORDS = [{'name': 'Thomas',
                      'favorite_number': 1,
-                     'favorite_color': 'blue'}, {'name': 'Henry',
-                                                 'favorite_number': 3,
-                                                 'favorite_color': 'green'},
+                     'favorite_color': 'blue'},
+                    {'name': 'Henry',
+                     'favorite_number': 3,
+                     'favorite_color': 'green'},
                     {'name': 'Toby',
                      'favorite_number': 7,
-                     'favorite_color': 'brown'}, {'name': 'Gordon',
-                                                  'favorite_number': 4,
-                                                  'favorite_color': 'blue'},
+                     'favorite_color': 'brown'},
+                    {'name': 'Gordon',
+                     'favorite_number': 4,
+                     'favorite_color': 'blue'},
                     {'name': 'Emily',
                      'favorite_number': -1,
-                     'favorite_color': 'Red'}, {'name': 'Percy',
-                                                'favorite_number': 6,
-                                                'favorite_color': 'Green'}]
+                     'favorite_color': 'Red'},
+                    {'name': 'Percy',
+                     'favorite_number': 6,
+                     'favorite_color': 'Green'}]
+
     self.SCHEMA = pa.schema([
-        ('name', pa.binary()),
+        ('name', pa.string()),
         ('favorite_number', pa.int64()),
-        ('favorite_color', pa.binary())
+        ('favorite_color', pa.string())
     ])
 
     self.SCHEMA96 = pa.schema([
-        ('name', pa.binary()),
+        ('name', pa.string()),
         ('favorite_number', pa.timestamp('ns')),
-        ('favorite_color', pa.binary())
+        ('favorite_color', pa.string())
     ])
 
   def tearDown(self):
diff --git a/sdks/python/apache_beam/io/source_test_utils.py b/sdks/python/apache_beam/io/source_test_utils.py
index f60fafb..d90d245 100644
--- a/sdks/python/apache_beam/io/source_test_utils.py
+++ b/sdks/python/apache_beam/io/source_test_utils.py
@@ -56,6 +56,7 @@ from collections import namedtuple
 from multiprocessing.pool import ThreadPool
 
 from apache_beam.io import iobase
+from apache_beam.testing.util import equal_to
 
 __all__ = ['read_from_source',
            'assert_sources_equal_reference_source',
@@ -174,7 +175,7 @@ def assert_sources_equal_reference_source(reference_source_info, sources_info):
         'list of sources. Number of records were %d and %d instead.'
         % (len(reference_records), len(source_records)))
 
-  if sorted(reference_records) != sorted(source_records):
+  if equal_to(reference_records)(source_records):
     raise ValueError(
         'Reference source and provided list of sources must produce the '
         'same set of records.')
@@ -224,13 +225,13 @@ def assert_reentrant_reads_succeed(source_info):
     for val in read_iter:
       original_read.append(val)
 
-    if sorted(original_read) != sorted(expected_values):
+    if equal_to(original_read)(expected_values):
       raise ValueError('Source did not produce expected values when '
                        'performing a reentrant read after reading %d values. '
                        'Expected %r received %r.'
                        % (i, expected_values, original_read))
 
-    if sorted(reentrant_read) != sorted(expected_values):
+    if equal_to(reentrant_read)(expected_values):
       raise ValueError('A reentrant read of source after reading %d values '
                        'did not produce expected values. Expected %r '
                        'received %r.'
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 2ffca7b..152226f 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -58,7 +58,7 @@ setenv =
   BEAM_EXPERIMENTAL_PY3=1
   RUN_SKIPPED_PY3_TESTS=0
 modules =
-  apache_beam.typehints,apache_beam.coders,apache_beam.options,apache_beam.tools,apache_beam.utils,apache_beam.internal,apache_beam.metrics,apache_beam.portability,apache_beam.pipeline_test,apache_beam.pvalue_test,apache_beam.runners,apache_beam.io.hadoopfilesystem_test,apache_beam.io.hdfs_integration_test,apache_beam.io.gcp.tests.utils_test,apache_beam.io.gcp.big_query_query_to_table_it_test,apache_beam.io.gcp.bigquery_io_read_it_test,apache_beam.io.gcp.bigquery_test,apache_beam.io.gcp. [...]
+  apache_beam.typehints,apache_beam.coders,apache_beam.options,apache_beam.tools,apache_beam.utils,apache_beam.internal,apache_beam.metrics,apache_beam.portability,apache_beam.pipeline_test,apache_beam.pvalue_test,apache_beam.runners,apache_beam.io.hadoopfilesystem_test,apache_beam.io.hdfs_integration_test,apache_beam.io.gcp.tests.utils_test,apache_beam.io.gcp.big_query_query_to_table_it_test,apache_beam.io.gcp.bigquery_io_read_it_test,apache_beam.io.gcp.bigquery_test,apache_beam.io.gcp. [...]
 commands =
   python --version
   pip --version