You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2022/09/19 17:07:50 UTC
[beam] branch master updated: Revert "Trying out property-based tests for Beam python coders (#22233)"
This is an automated email from the ASF dual-hosted git repository.
pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 2a34d0cfa23 Revert "Trying out property-based tests for Beam python coders (#22233)"
new cb5230ac147 Merge pull request #23265 from pabloem/rv-pbt
2a34d0cfa23 is described below
commit 2a34d0cfa23b4269ef127f8a4b9154611f246e2a
Author: Pablo E <pa...@apache.org>
AuthorDate: Thu Sep 15 14:27:49 2022 -0700
Revert "Trying out property-based tests for Beam python coders (#22233)"
This reverts commit 9be9a43c1b85be9b2f78e2943f6092ccb88e13b4.
---
.../coders/coders_property_based_test.py | 156 ---------------------
sdks/python/setup.py | 1 -
2 files changed, 157 deletions(-)
diff --git a/sdks/python/apache_beam/coders/coders_property_based_test.py b/sdks/python/apache_beam/coders/coders_property_based_test.py
deleted file mode 100644
index 6b073c01380..00000000000
--- a/sdks/python/apache_beam/coders/coders_property_based_test.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""Property tests for coders in the Python SDK.
-
-The tests in this file utilize the hypothesis library to generate random test
-cases and run them against Beam's coder implementations.
-
-These tests are similar to fuzzing, except they test invariant properties
-of code.
-"""
-
-import math
-import typing
-import unittest
-# TODO(pabloem): Include other categories of characters
-from datetime import datetime
-from string import ascii_letters
-from string import digits
-
-import numpy as np
-from hypothesis import strategies as st
-from hypothesis import assume
-from hypothesis import given
-from hypothesis import settings
-from pytz import utc
-
-from apache_beam.coders import FloatCoder
-from apache_beam.coders import RowCoder
-from apache_beam.coders import StrUtf8Coder
-from apache_beam.coders.typecoders import registry as coders_registry
-from apache_beam.typehints.schemas import PRIMITIVE_TO_ATOMIC_TYPE
-from apache_beam.typehints.schemas import typing_to_runner_api
-from apache_beam.utils.timestamp import Timestamp
-
-SCHEMA_TYPES_TO_STRATEGY = {
- str: st.text(),
- bytes: st.binary(),
- typing.ByteString: st.binary(),
- # Maximum datetime on year 3000 to conform to Windows OS limits.
- Timestamp: st.datetimes(
- min_value=datetime(1970, 1, 1, 1, 1),
- max_value=datetime(
- 3000, 1, 1, 0,
- 0)).map(lambda dt: Timestamp.from_utc_datetime(dt.astimezone(utc))),
- int: st.integers(min_value=-(1 << 63 - 1), max_value=1 << 63 - 1),
- # INT8/BYTE not yet supported by RowCoder.
- # np.int8: st.binary(min_size=1, max_size=1),
- # INT16 not yet supported by RowCoder.
- # np.int16: st.integers(min_value=-(1 << 15 - 1), max_value=1 << 15 - 1),
- np.int32: st.integers(min_value=-(1 << 31 - 1), max_value=1 << 31 - 1),
- np.int64: st.integers(min_value=-(1 << 63 - 1), max_value=1 << 63 - 1),
- np.uint32: st.integers(min_value=0, max_value=1 << 32 - 1),
- np.uint64: st.integers(min_value=0, max_value=1 << 64 - 1),
- np.float32: st.floats(width=32, allow_nan=False),
- np.float64: st.floats(width=64, allow_nan=False),
- float: st.floats(width=64, allow_nan=False),
- bool: st.booleans()
-}
-
-# TODO(https://github.com/apache/beam/issues/23003): Support logical types.
-SCHEMA_TYPES = list(SCHEMA_TYPES_TO_STRATEGY.keys())
-
-# A hypothesis strategy that generates schemas.
-# A schema is a list containing tuples of strings (field names), types (field
-# types) and boolean (nullable or not).
-# This strategy currently generates rows with simple types (i.e. non-list, and
-# non-map fields).
-SCHEMA_GENERATOR_STRATEGY = st.lists(
- st.tuples(
- st.text(ascii_letters + digits + '_', min_size=1),
- st.sampled_from(SCHEMA_TYPES),
- st.booleans()))
-
-TYPES_UNSUPPORTED_BY_ROW_CODER = {np.int8, np.int16}
-
-
-class TypesAreAllTested(unittest.TestCase):
- def test_all_types_are_tested(self):
- # Verify that all types among Beam's defined types are being tested
- self.assertEqual(
- set(SCHEMA_TYPES).intersection(PRIMITIVE_TO_ATOMIC_TYPE.keys()),
- set(PRIMITIVE_TO_ATOMIC_TYPE.keys()).difference(
- TYPES_UNSUPPORTED_BY_ROW_CODER))
-
-
-class ProperyTestingCoders(unittest.TestCase):
- @given(st.text())
- def test_string_coder(self, txt: str):
- coder = StrUtf8Coder()
- self.assertEqual(coder.decode(coder.encode(txt)), txt)
-
- @given(st.floats())
- def test_float_coder(self, num: float):
- coder = FloatCoder()
- test_num = coder.decode(coder.encode(num))
- if math.isnan(num):
- # This special branch is needed because by definition
- # nan != nan.
- self.assertTrue(math.isnan(test_num))
- else:
- self.assertEqual(coder.decode(coder.encode(num)), num)
-
- @settings(deadline=None, print_blob=True)
- @given(st.data())
- def test_row_coder(self, data: st.DataObject):
- """Generate rows and schemas, and test their encoding/decoding.
-
- The schemas are generated based on the SCHEMA_GENERATOR_STRATEGY.
- """
- schema = data.draw(SCHEMA_GENERATOR_STRATEGY)
- # Assume that the cardinality of the set of names is the same
- # as the length of the schema. This means there's no duplicate
- # names for fields.
- # If this condition does not hold, then we must not continue the
- # test.
- assume(len({name for name, _, _ in schema}) == len(schema))
- assume(
- len({n[0]
- for n, _, _ in schema}.intersection(set(digits + '_'))) == 0)
- RowType = typing.NamedTuple( # type: ignore
- 'RandomRowType',
- [(name, type_ if not nullable else typing.Optional[type_]) for name,
- type_,
- nullable in schema])
- coders_registry.register_coder(RowType, RowCoder)
-
- # TODO(https://github.com/apache/beam/issues/23002): Apply nulls for these
- row = RowType( # type: ignore
- **{
- name: data.draw(SCHEMA_TYPES_TO_STRATEGY[type_])
- for name,
- type_,
- nullable in schema
- })
-
- coder = RowCoder(typing_to_runner_api(RowType).row_type.schema)
- self.assertEqual(coder.decode(coder.encode(row)), row)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 7abbf758138..8017c1b692a 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -253,7 +253,6 @@ if __name__ == '__main__':
],
'test': [
'freezegun>=0.3.12',
- 'hypothesis<7',
'joblib>=1.0.1',
'mock>=1.0.1,<3.0.0',
'pandas<2.0.0',