You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2021/10/31 19:00:10 UTC

[iceberg] branch master updated: Python: Add black, isort, and tox -e format command (#3383)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 16ea3b5  Python: Add black, isort, and tox -e format command (#3383)
16ea3b5 is described below

commit 16ea3b5c3746811bdd5fddb046ebecb1102f0271
Author: Samuel Redai <43...@users.noreply.github.com>
AuthorDate: Sun Oct 31 12:00:00 2021 -0700

    Python: Add black, isort, and tox -e format command (#3383)
---
 python/setup.py                         |   4 +-
 python/src/iceberg/types.py             |  46 +++++++++---
 python/src/iceberg/utils/bin_packing.py |   5 +-
 python/tests/test_types.py              | 120 ++++++++++++++++++++++++++------
 python/tests/utils/test_bin_packing.py  |  78 ++++++++++++++++-----
 python/tox.ini                          |  34 ++++++++-
 6 files changed, 231 insertions(+), 56 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index 9acba2c..5073c55 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -18,12 +18,12 @@
 from setuptools import setup
 
 setup(
-    name='py-iceberg',
+    name="py-iceberg",
     install_requires=[],
     extras_require={
         "dev": [
             "tox-travis==0.12",
             "pytest",
         ],
-    }
+    },
 )
diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py
index 2113787..57856f9 100644
--- a/python/src/iceberg/types.py
+++ b/python/src/iceberg/types.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+
 class Type(object):
     def __init__(self, type_string: str, repr_string: str, is_primitive=False):
         self._type_string = type_string
@@ -34,7 +35,9 @@ class Type(object):
 
 class FixedType(Type):
     def __init__(self, length: int):
-        super().__init__(f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True)
+        super().__init__(
+            f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True
+        )
         self._length = length
 
     @property
@@ -44,8 +47,11 @@ class FixedType(Type):
 
 class DecimalType(Type):
     def __init__(self, precision: int, scale: int):
-        super().__init__(f"decimal({precision}, {scale})",
-                         f"DecimalType(precision={precision}, scale={scale})", is_primitive=True)
+        super().__init__(
+            f"decimal({precision}, {scale})",
+            f"DecimalType(precision={precision}, scale={scale})",
+            is_primitive=True,
+        )
         self._precision = precision
         self._scale = scale
 
@@ -59,7 +65,14 @@ class DecimalType(Type):
 
 
 class NestedField(object):
-    def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc: str = None):
+    def __init__(
+        self,
+        is_optional: bool,
+        field_id: int,
+        name: str,
+        field_type: Type,
+        doc: str = None,
+    ):
         self._is_optional = is_optional
         self._id = field_id
         self._name = name
@@ -87,17 +100,26 @@ class NestedField(object):
         return self._type
 
     def __repr__(self):
-        return (f"NestedField(is_optional={self._is_optional}, field_id={self._id}, "
-                f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})")
+        return (
+            f"NestedField(is_optional={self._is_optional}, field_id={self._id}, "
+            f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})"
+        )
 
     def __str__(self):
-        return (f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}"
-                "" if self._doc is None else f" ({self._doc})")
+        return (
+            f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}"
+            ""
+            if self._doc is None
+            else f" ({self._doc})"
+        )
 
 
 class StructType(Type):
     def __init__(self, fields: list):
-        super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType(fields={repr(fields)})")
+        super().__init__(
+            f"struct<{', '.join(map(str, fields))}>",
+            f"StructType(fields={repr(fields)})",
+        )
         self._fields = fields
 
     @property
@@ -117,8 +139,10 @@ class ListType(Type):
 
 class MapType(Type):
     def __init__(self, key: NestedField, value: NestedField):
-        super().__init__(f"map<{key.type}, {value.type}>",
-                         f"MapType(key={repr(key)}, value={repr(value)})")
+        super().__init__(
+            f"map<{key.type}, {value.type}>",
+            f"MapType(key={repr(key)}, value={repr(value)})",
+        )
         self._key_field = key
         self._value_field = value
 
diff --git a/python/src/iceberg/utils/bin_packing.py b/python/src/iceberg/utils/bin_packing.py
index 17d4faa..8e32710 100644
--- a/python/src/iceberg/utils/bin_packing.py
+++ b/python/src/iceberg/utils/bin_packing.py
@@ -15,8 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
+
 class PackingIterator:
-    def __init__(self, items, target_weight, lookback, weight_func, largest_bin_first=False):
+    def __init__(
+        self, items, target_weight, lookback, weight_func, largest_bin_first=False
+    ):
         self.items = iter(items)
         self.target_weight = target_weight
         self.lookback = lookback
diff --git a/python/tests/test_types.py b/python/tests/test_types.py
index b6d23dc..3b1d834 100644
--- a/python/tests/test_types.py
+++ b/python/tests/test_types.py
@@ -15,15 +15,47 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from iceberg.types import (BinaryType, BooleanType, DateType, DecimalType, DoubleType, FixedType,
-                           FloatType, IntegerType, ListType, LongType, MapType, NestedField, StringType,
-                           StructType, TimestampType, TimestamptzType, TimeType, UUIDType)
 import pytest
 
+from iceberg.types import (
+    BinaryType,
+    BooleanType,
+    DateType,
+    DecimalType,
+    DoubleType,
+    FixedType,
+    FloatType,
+    IntegerType,
+    ListType,
+    LongType,
+    MapType,
+    NestedField,
+    StringType,
+    StructType,
+    TimestampType,
+    TimestamptzType,
+    TimeType,
+    UUIDType,
+)
 
-@pytest.mark.parametrize("input_type",
-                         [BooleanType, IntegerType, LongType, FloatType, DoubleType, DateType, TimeType,
-                          TimestampType, TimestamptzType, StringType, UUIDType, BinaryType])
+
+@pytest.mark.parametrize(
+    "input_type",
+    [
+        BooleanType,
+        IntegerType,
+        LongType,
+        FloatType,
+        DoubleType,
+        DateType,
+        TimeType,
+        TimestampType,
+        TimestamptzType,
+        StringType,
+        UUIDType,
+        BinaryType,
+    ],
+)
 def test_repr_primitive_types(input_type):
     assert input_type == eval(repr(input_type))
 
@@ -40,25 +72,47 @@ def test_decimal_type():
     type_var = DecimalType(precision=9, scale=2)
     assert type_var.precision == 9
     assert type_var.scale == 2
-    assert str(type_var) == 'decimal(9, 2)'
+    assert str(type_var) == "decimal(9, 2)"
     assert repr(type_var) == "DecimalType(precision=9, scale=2)"
     assert str(type_var) == str(eval(repr(type_var)))
 
 
 def test_struct_type():
-    type_var = StructType([NestedField(True, 1, "optional_field", IntegerType),
-                           NestedField(False, 2, "required_field", FixedType(5)),
-                           NestedField(False, 3, "required_field", StructType([
-                               NestedField(True, 4, "optional_field", DecimalType(8, 2)),
-                               NestedField(False, 5, "required_field", LongType)]))])
+    type_var = StructType(
+        [
+            NestedField(True, 1, "optional_field", IntegerType),
+            NestedField(False, 2, "required_field", FixedType(5)),
+            NestedField(
+                False,
+                3,
+                "required_field",
+                StructType(
+                    [
+                        NestedField(True, 4, "optional_field", DecimalType(8, 2)),
+                        NestedField(False, 5, "required_field", LongType),
+                    ]
+                ),
+            ),
+        ]
+    )
     assert len(type_var.fields) == 3
     assert str(type_var) == str(eval(repr(type_var)))
 
 
 def test_list_type():
-    type_var = ListType(NestedField(False, 1, "required_field", StructType([
-        NestedField(True, 2, "optional_field", DecimalType(8, 2)),
-        NestedField(False, 3, "required_field", LongType)])))
+    type_var = ListType(
+        NestedField(
+            False,
+            1,
+            "required_field",
+            StructType(
+                [
+                    NestedField(True, 2, "optional_field", DecimalType(8, 2)),
+                    NestedField(False, 3, "required_field", LongType),
+                ]
+            ),
+        )
+    )
     assert isinstance(type_var.element.type, StructType)
     assert len(type_var.element.type.fields) == 2
     assert type_var.element.field_id == 1
@@ -66,8 +120,10 @@ def test_list_type():
 
 
 def test_map_type():
-    type_var = MapType(NestedField(True, 1, "optional_field", DoubleType),
-                       NestedField(False, 2, "required_field", UUIDType))
+    type_var = MapType(
+        NestedField(True, 1, "optional_field", DoubleType),
+        NestedField(False, 2, "required_field", UUIDType),
+    )
     assert type_var.key.type == DoubleType
     assert type_var.key.field_id == 1
     assert type_var.value.type == UUIDType
@@ -76,12 +132,30 @@ def test_map_type():
 
 
 def test_nested_field():
-    field_var = NestedField(True, 1, "optional_field1", StructType([
-        NestedField(True, 2, "optional_field2", ListType(
-            NestedField(False, 3, "required_field3", DoubleType))),
-        NestedField(False, 4, "required_field4", MapType(
-            NestedField(True, 5, "optional_field5", TimeType),
-            NestedField(False, 6, "required_field6", UUIDType)))]))
+    field_var = NestedField(
+        True,
+        1,
+        "optional_field1",
+        StructType(
+            [
+                NestedField(
+                    True,
+                    2,
+                    "optional_field2",
+                    ListType(NestedField(False, 3, "required_field3", DoubleType)),
+                ),
+                NestedField(
+                    False,
+                    4,
+                    "required_field4",
+                    MapType(
+                        NestedField(True, 5, "optional_field5", TimeType),
+                        NestedField(False, 6, "required_field6", UUIDType),
+                    ),
+                ),
+            ]
+        ),
+    )
     assert field_var.is_optional
     assert not field_var.is_required
     assert field_var.field_id == 1
diff --git a/python/tests/utils/test_bin_packing.py b/python/tests/utils/test_bin_packing.py
index 0cf44f1..7a5a074 100644
--- a/python/tests/utils/test_bin_packing.py
+++ b/python/tests/utils/test_bin_packing.py
@@ -17,34 +17,76 @@
 
 import random
 
-from iceberg.utils.bin_packing import PackingIterator
 import pytest
 
+from iceberg.utils.bin_packing import PackingIterator
+
 
-@pytest.mark.parametrize("splits, lookback, split_size, open_cost", [
-    ([random.randint(0, 64) for x in range(200)], 20, 128, 4),  # random splits
-    ([], 20, 128, 4),  # no splits
-    ([0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100, 20, 128, 4)  # sparse
-])
+@pytest.mark.parametrize(
+    "splits, lookback, split_size, open_cost",
+    [
+        ([random.randint(0, 64) for x in range(200)], 20, 128, 4),  # random splits
+        ([], 20, 128, 4),  # no splits
+        (
+            [0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100,
+            20,
+            128,
+            4,
+        ),  # sparse
+    ],
+)
 def test_bin_packing(splits, lookback, split_size, open_cost):
-
     def weight_func(x):
         return max(x, open_cost)
 
-    item_list_sums = [sum(item)
-                      for item in PackingIterator(splits, split_size, lookback, weight_func)]
+    item_list_sums = [
+        sum(item) for item in PackingIterator(splits, split_size, lookback, weight_func)
+    ]
     assert all([split_size >= item_sum >= 0 for item_sum in item_list_sums])
 
 
-@pytest.mark.parametrize("splits, target_weight, lookback, largest_bin_first, expected_lists", [
-    ([36, 36, 36, 36, 73, 110, 128], 128, 2, True, [[110], [128], [36, 73], [36, 36, 36]]),
-    ([36, 36, 36, 36, 73, 110, 128], 128, 2, False, [[36, 36, 36], [36, 73], [110], [128]]),
-    ([64, 64, 128, 32, 32, 32, 32], 128, 1, True, [[64, 64], [128], [32, 32, 32, 32]]),
-    ([64, 64, 128, 32, 32, 32, 32], 128, 1, False, [[64, 64], [128], [32, 32, 32, 32]]),
-])
-def test_bin_packing_lookback(splits, target_weight, lookback, largest_bin_first, expected_lists):
+@pytest.mark.parametrize(
+    "splits, target_weight, lookback, largest_bin_first, expected_lists",
+    [
+        (
+            [36, 36, 36, 36, 73, 110, 128],
+            128,
+            2,
+            True,
+            [[110], [128], [36, 73], [36, 36, 36]],
+        ),
+        (
+            [36, 36, 36, 36, 73, 110, 128],
+            128,
+            2,
+            False,
+            [[36, 36, 36], [36, 73], [110], [128]],
+        ),
+        (
+            [64, 64, 128, 32, 32, 32, 32],
+            128,
+            1,
+            True,
+            [[64, 64], [128], [32, 32, 32, 32]],
+        ),
+        (
+            [64, 64, 128, 32, 32, 32, 32],
+            128,
+            1,
+            False,
+            [[64, 64], [128], [32, 32, 32, 32]],
+        ),
+    ],
+)
+def test_bin_packing_lookback(
+    splits, target_weight, lookback, largest_bin_first, expected_lists
+):
     def weight_func(x):
         return x
 
-    assert [item for item in PackingIterator(
-        splits, target_weight, lookback, weight_func, largest_bin_first)] == expected_lists
+    assert [
+        item
+        for item in PackingIterator(
+            splits, target_weight, lookback, weight_func, largest_bin_first
+        )
+    ] == expected_lists
diff --git a/python/tox.ini b/python/tox.ini
index d1596ea..b099a00 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -34,20 +34,52 @@ commands =
     coverage report -m --fail-under=90
     coverage html -d test-reports/{envname}/coverage-html
     coverage xml -o test-reports/{envname}/coverage.xml
+[testenv:format]
+description = reformat all source code
+basepython = python3
+deps =
+    black
+    isort
+    flake8
+skip_install = true
+commands =
+    isort --project iceberg --profile black setup.py src tests
+    black setup.py src tests
+    flake8 setup.py src tests
 
 [testenv:linters]
 basepython = python3
 skip_install = true
 deps =
     .
+    {[testenv:isort]deps}
+    {[testenv:black]deps}
     {[testenv:flake8]deps}
     {[testenv:bandit]deps}
     {[testenv:mypy]deps}
 commands =
+    {[testenv:isort]deps}
+    {[testenv:black]deps}
     {[testenv:flake8]commands}
     {[testenv:bandit]commands}
     {[testenv:mypy]commands}
 
+[testenv:isort]
+basepython = python3
+skip_install = true
+deps =
+    isort
+commands =
+    isort --recursive --project iceberg --profile black --check-only setup.py src tests
+
+[testenv:black]
+basepython = python3
+skip_install = true
+deps =
+    black
+commands =
+    black --check --diff src setup.py tests
+
 [testenv:flake8]
 basepython = python3
 skip_install = true
@@ -93,7 +125,7 @@ commands =
     python -m http.server {posargs}
 
 [flake8]
-ignore = E501,W503
+ignore = E501,I100,I202,W503
 exclude =
     *.egg-info,
     *.pyc,