You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2021/10/31 19:00:10 UTC
[iceberg] branch master updated: Python: Add black, isort,
and tox -e format command (#3383)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 16ea3b5 Python: Add black, isort, and tox -e format command (#3383)
16ea3b5 is described below
commit 16ea3b5c3746811bdd5fddb046ebecb1102f0271
Author: Samuel Redai <43...@users.noreply.github.com>
AuthorDate: Sun Oct 31 12:00:00 2021 -0700
Python: Add black, isort, and tox -e format command (#3383)
---
python/setup.py | 4 +-
python/src/iceberg/types.py | 46 +++++++++---
python/src/iceberg/utils/bin_packing.py | 5 +-
python/tests/test_types.py | 120 ++++++++++++++++++++++++++------
python/tests/utils/test_bin_packing.py | 78 ++++++++++++++++-----
python/tox.ini | 34 ++++++++-
6 files changed, 231 insertions(+), 56 deletions(-)
diff --git a/python/setup.py b/python/setup.py
index 9acba2c..5073c55 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -18,12 +18,12 @@
from setuptools import setup
setup(
- name='py-iceberg',
+ name="py-iceberg",
install_requires=[],
extras_require={
"dev": [
"tox-travis==0.12",
"pytest",
],
- }
+ },
)
diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py
index 2113787..57856f9 100644
--- a/python/src/iceberg/types.py
+++ b/python/src/iceberg/types.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+
class Type(object):
def __init__(self, type_string: str, repr_string: str, is_primitive=False):
self._type_string = type_string
@@ -34,7 +35,9 @@ class Type(object):
class FixedType(Type):
def __init__(self, length: int):
- super().__init__(f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True)
+ super().__init__(
+ f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True
+ )
self._length = length
@property
@@ -44,8 +47,11 @@ class FixedType(Type):
class DecimalType(Type):
def __init__(self, precision: int, scale: int):
- super().__init__(f"decimal({precision}, {scale})",
- f"DecimalType(precision={precision}, scale={scale})", is_primitive=True)
+ super().__init__(
+ f"decimal({precision}, {scale})",
+ f"DecimalType(precision={precision}, scale={scale})",
+ is_primitive=True,
+ )
self._precision = precision
self._scale = scale
@@ -59,7 +65,14 @@ class DecimalType(Type):
class NestedField(object):
- def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc: str = None):
+ def __init__(
+ self,
+ is_optional: bool,
+ field_id: int,
+ name: str,
+ field_type: Type,
+ doc: str = None,
+ ):
self._is_optional = is_optional
self._id = field_id
self._name = name
@@ -87,17 +100,26 @@ class NestedField(object):
return self._type
def __repr__(self):
- return (f"NestedField(is_optional={self._is_optional}, field_id={self._id}, "
- f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})")
+ return (
+ f"NestedField(is_optional={self._is_optional}, field_id={self._id}, "
+ f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})"
+ )
def __str__(self):
- return (f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}"
- "" if self._doc is None else f" ({self._doc})")
+ return (
+ f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}"
+ ""
+ if self._doc is None
+ else f" ({self._doc})"
+ )
class StructType(Type):
def __init__(self, fields: list):
- super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType(fields={repr(fields)})")
+ super().__init__(
+ f"struct<{', '.join(map(str, fields))}>",
+ f"StructType(fields={repr(fields)})",
+ )
self._fields = fields
@property
@@ -117,8 +139,10 @@ class ListType(Type):
class MapType(Type):
def __init__(self, key: NestedField, value: NestedField):
- super().__init__(f"map<{key.type}, {value.type}>",
- f"MapType(key={repr(key)}, value={repr(value)})")
+ super().__init__(
+ f"map<{key.type}, {value.type}>",
+ f"MapType(key={repr(key)}, value={repr(value)})",
+ )
self._key_field = key
self._value_field = value
diff --git a/python/src/iceberg/utils/bin_packing.py b/python/src/iceberg/utils/bin_packing.py
index 17d4faa..8e32710 100644
--- a/python/src/iceberg/utils/bin_packing.py
+++ b/python/src/iceberg/utils/bin_packing.py
@@ -15,8 +15,11 @@
# specific language governing permissions and limitations
# under the License.
+
class PackingIterator:
- def __init__(self, items, target_weight, lookback, weight_func, largest_bin_first=False):
+ def __init__(
+ self, items, target_weight, lookback, weight_func, largest_bin_first=False
+ ):
self.items = iter(items)
self.target_weight = target_weight
self.lookback = lookback
diff --git a/python/tests/test_types.py b/python/tests/test_types.py
index b6d23dc..3b1d834 100644
--- a/python/tests/test_types.py
+++ b/python/tests/test_types.py
@@ -15,15 +15,47 @@
# specific language governing permissions and limitations
# under the License.
-from iceberg.types import (BinaryType, BooleanType, DateType, DecimalType, DoubleType, FixedType,
- FloatType, IntegerType, ListType, LongType, MapType, NestedField, StringType,
- StructType, TimestampType, TimestamptzType, TimeType, UUIDType)
import pytest
+from iceberg.types import (
+ BinaryType,
+ BooleanType,
+ DateType,
+ DecimalType,
+ DoubleType,
+ FixedType,
+ FloatType,
+ IntegerType,
+ ListType,
+ LongType,
+ MapType,
+ NestedField,
+ StringType,
+ StructType,
+ TimestampType,
+ TimestamptzType,
+ TimeType,
+ UUIDType,
+)
-@pytest.mark.parametrize("input_type",
- [BooleanType, IntegerType, LongType, FloatType, DoubleType, DateType, TimeType,
- TimestampType, TimestamptzType, StringType, UUIDType, BinaryType])
+
+@pytest.mark.parametrize(
+ "input_type",
+ [
+ BooleanType,
+ IntegerType,
+ LongType,
+ FloatType,
+ DoubleType,
+ DateType,
+ TimeType,
+ TimestampType,
+ TimestamptzType,
+ StringType,
+ UUIDType,
+ BinaryType,
+ ],
+)
def test_repr_primitive_types(input_type):
assert input_type == eval(repr(input_type))
@@ -40,25 +72,47 @@ def test_decimal_type():
type_var = DecimalType(precision=9, scale=2)
assert type_var.precision == 9
assert type_var.scale == 2
- assert str(type_var) == 'decimal(9, 2)'
+ assert str(type_var) == "decimal(9, 2)"
assert repr(type_var) == "DecimalType(precision=9, scale=2)"
assert str(type_var) == str(eval(repr(type_var)))
def test_struct_type():
- type_var = StructType([NestedField(True, 1, "optional_field", IntegerType),
- NestedField(False, 2, "required_field", FixedType(5)),
- NestedField(False, 3, "required_field", StructType([
- NestedField(True, 4, "optional_field", DecimalType(8, 2)),
- NestedField(False, 5, "required_field", LongType)]))])
+ type_var = StructType(
+ [
+ NestedField(True, 1, "optional_field", IntegerType),
+ NestedField(False, 2, "required_field", FixedType(5)),
+ NestedField(
+ False,
+ 3,
+ "required_field",
+ StructType(
+ [
+ NestedField(True, 4, "optional_field", DecimalType(8, 2)),
+ NestedField(False, 5, "required_field", LongType),
+ ]
+ ),
+ ),
+ ]
+ )
assert len(type_var.fields) == 3
assert str(type_var) == str(eval(repr(type_var)))
def test_list_type():
- type_var = ListType(NestedField(False, 1, "required_field", StructType([
- NestedField(True, 2, "optional_field", DecimalType(8, 2)),
- NestedField(False, 3, "required_field", LongType)])))
+ type_var = ListType(
+ NestedField(
+ False,
+ 1,
+ "required_field",
+ StructType(
+ [
+ NestedField(True, 2, "optional_field", DecimalType(8, 2)),
+ NestedField(False, 3, "required_field", LongType),
+ ]
+ ),
+ )
+ )
assert isinstance(type_var.element.type, StructType)
assert len(type_var.element.type.fields) == 2
assert type_var.element.field_id == 1
@@ -66,8 +120,10 @@ def test_list_type():
def test_map_type():
- type_var = MapType(NestedField(True, 1, "optional_field", DoubleType),
- NestedField(False, 2, "required_field", UUIDType))
+ type_var = MapType(
+ NestedField(True, 1, "optional_field", DoubleType),
+ NestedField(False, 2, "required_field", UUIDType),
+ )
assert type_var.key.type == DoubleType
assert type_var.key.field_id == 1
assert type_var.value.type == UUIDType
@@ -76,12 +132,30 @@ def test_map_type():
def test_nested_field():
- field_var = NestedField(True, 1, "optional_field1", StructType([
- NestedField(True, 2, "optional_field2", ListType(
- NestedField(False, 3, "required_field3", DoubleType))),
- NestedField(False, 4, "required_field4", MapType(
- NestedField(True, 5, "optional_field5", TimeType),
- NestedField(False, 6, "required_field6", UUIDType)))]))
+ field_var = NestedField(
+ True,
+ 1,
+ "optional_field1",
+ StructType(
+ [
+ NestedField(
+ True,
+ 2,
+ "optional_field2",
+ ListType(NestedField(False, 3, "required_field3", DoubleType)),
+ ),
+ NestedField(
+ False,
+ 4,
+ "required_field4",
+ MapType(
+ NestedField(True, 5, "optional_field5", TimeType),
+ NestedField(False, 6, "required_field6", UUIDType),
+ ),
+ ),
+ ]
+ ),
+ )
assert field_var.is_optional
assert not field_var.is_required
assert field_var.field_id == 1
diff --git a/python/tests/utils/test_bin_packing.py b/python/tests/utils/test_bin_packing.py
index 0cf44f1..7a5a074 100644
--- a/python/tests/utils/test_bin_packing.py
+++ b/python/tests/utils/test_bin_packing.py
@@ -17,34 +17,76 @@
import random
-from iceberg.utils.bin_packing import PackingIterator
import pytest
+from iceberg.utils.bin_packing import PackingIterator
+
-@pytest.mark.parametrize("splits, lookback, split_size, open_cost", [
- ([random.randint(0, 64) for x in range(200)], 20, 128, 4), # random splits
- ([], 20, 128, 4), # no splits
- ([0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100, 20, 128, 4) # sparse
-])
+@pytest.mark.parametrize(
+ "splits, lookback, split_size, open_cost",
+ [
+ ([random.randint(0, 64) for x in range(200)], 20, 128, 4), # random splits
+ ([], 20, 128, 4), # no splits
+ (
+ [0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100,
+ 20,
+ 128,
+ 4,
+ ), # sparse
+ ],
+)
def test_bin_packing(splits, lookback, split_size, open_cost):
-
def weight_func(x):
return max(x, open_cost)
- item_list_sums = [sum(item)
- for item in PackingIterator(splits, split_size, lookback, weight_func)]
+ item_list_sums = [
+ sum(item) for item in PackingIterator(splits, split_size, lookback, weight_func)
+ ]
assert all([split_size >= item_sum >= 0 for item_sum in item_list_sums])
-@pytest.mark.parametrize("splits, target_weight, lookback, largest_bin_first, expected_lists", [
- ([36, 36, 36, 36, 73, 110, 128], 128, 2, True, [[110], [128], [36, 73], [36, 36, 36]]),
- ([36, 36, 36, 36, 73, 110, 128], 128, 2, False, [[36, 36, 36], [36, 73], [110], [128]]),
- ([64, 64, 128, 32, 32, 32, 32], 128, 1, True, [[64, 64], [128], [32, 32, 32, 32]]),
- ([64, 64, 128, 32, 32, 32, 32], 128, 1, False, [[64, 64], [128], [32, 32, 32, 32]]),
-])
-def test_bin_packing_lookback(splits, target_weight, lookback, largest_bin_first, expected_lists):
+@pytest.mark.parametrize(
+ "splits, target_weight, lookback, largest_bin_first, expected_lists",
+ [
+ (
+ [36, 36, 36, 36, 73, 110, 128],
+ 128,
+ 2,
+ True,
+ [[110], [128], [36, 73], [36, 36, 36]],
+ ),
+ (
+ [36, 36, 36, 36, 73, 110, 128],
+ 128,
+ 2,
+ False,
+ [[36, 36, 36], [36, 73], [110], [128]],
+ ),
+ (
+ [64, 64, 128, 32, 32, 32, 32],
+ 128,
+ 1,
+ True,
+ [[64, 64], [128], [32, 32, 32, 32]],
+ ),
+ (
+ [64, 64, 128, 32, 32, 32, 32],
+ 128,
+ 1,
+ False,
+ [[64, 64], [128], [32, 32, 32, 32]],
+ ),
+ ],
+)
+def test_bin_packing_lookback(
+ splits, target_weight, lookback, largest_bin_first, expected_lists
+):
def weight_func(x):
return x
- assert [item for item in PackingIterator(
- splits, target_weight, lookback, weight_func, largest_bin_first)] == expected_lists
+ assert [
+ item
+ for item in PackingIterator(
+ splits, target_weight, lookback, weight_func, largest_bin_first
+ )
+ ] == expected_lists
diff --git a/python/tox.ini b/python/tox.ini
index d1596ea..b099a00 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -34,20 +34,52 @@ commands =
coverage report -m --fail-under=90
coverage html -d test-reports/{envname}/coverage-html
coverage xml -o test-reports/{envname}/coverage.xml
+[testenv:format]
+description = reformat all source code
+basepython = python3
+deps =
+ black
+ isort
+ flake8
+skip_install = true
+commands =
+ isort --project iceberg --profile black setup.py src tests
+ black setup.py src tests
+ flake8 setup.py src tests
[testenv:linters]
basepython = python3
skip_install = true
deps =
.
+ {[testenv:isort]deps}
+ {[testenv:black]deps}
{[testenv:flake8]deps}
{[testenv:bandit]deps}
{[testenv:mypy]deps}
commands =
+ {[testenv:isort]deps}
+ {[testenv:black]deps}
{[testenv:flake8]commands}
{[testenv:bandit]commands}
{[testenv:mypy]commands}
+[testenv:isort]
+basepython = python3
+skip_install = true
+deps =
+ isort
+commands =
+ isort --recursive --project iceberg --profile black --check-only setup.py src tests
+
+[testenv:black]
+basepython = python3
+skip_install = true
+deps =
+ black
+commands =
+ black --check --diff src setup.py tests
+
[testenv:flake8]
basepython = python3
skip_install = true
@@ -93,7 +125,7 @@ commands =
python -m http.server {posargs}
[flake8]
-ignore = E501,W503
+ignore = E501,I100,I202,W503
exclude =
*.egg-info,
*.pyc,