You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/05/13 23:09:07 UTC
[iceberg] branch master updated: Python: Add spellcheck to the CI (#4730)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 24f1ae096 Python: Add spellcheck to the CI (#4730)
24f1ae096 is described below
commit 24f1ae09636dfe655ff26d8850985faebef192bd
Author: Fokko Driesprong <fo...@tabular.io>
AuthorDate: Sat May 14 01:09:03 2022 +0200
Python: Add spellcheck to the CI (#4730)
---
dev/.rat-excludes | 2 +-
python/spellcheck-dictionary.txt | 45 ++++++++++++++++++++++++++++++++
python/src/iceberg/conversions.py | 2 +-
python/src/iceberg/io/pyarrow.py | 2 +-
python/src/iceberg/schema.py | 6 ++---
python/src/iceberg/table/partitioning.py | 2 +-
python/src/iceberg/types.py | 2 +-
python/tests/test_schema.py | 2 +-
python/tox.ini | 3 +++
9 files changed, 57 insertions(+), 9 deletions(-)
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 14d4d287c..7ed3646ea 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -25,4 +25,4 @@ package-list
sitemap.xml
derby.log
.python-version
-.*_index.md
\ No newline at end of file
+.*_index.md
diff --git a/python/spellcheck-dictionary.txt b/python/spellcheck-dictionary.txt
new file mode 100644
index 000000000..2476d5afd
--- /dev/null
+++ b/python/spellcheck-dictionary.txt
@@ -0,0 +1,45 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+accessor
+accessors
+Args
+ASF
+BD
+bool
+boolean
+datetime
+disjunction
+dispatchable
+endian
+enum
+FileInfo
+filesystem
+fs
+func
+io
+NativeFile
+nullability
+pragma
+pyarrow
+repr
+schemas
+seekable
+singledispatch
+str
+struct
+StructProtocol
+StructType
+Timestamptz
+Timestamptzs
+unscaled
+URI
\ No newline at end of file
diff --git a/python/src/iceberg/conversions.py b/python/src/iceberg/conversions.py
index 0af73567e..1df594985 100644
--- a/python/src/iceberg/conversions.py
+++ b/python/src/iceberg/conversions.py
@@ -147,7 +147,7 @@ def to_bytes(primitive_type: PrimitiveType, value: Union[bool, bytes, Decimal, f
Args:
primitive_type(PrimitiveType): An implementation of the PrimitiveType base class
value: The value to convert to bytes (The type of this value depends on which dispatched function is
- used--check dispatchable functions for typehints)
+ used--check dispatchable functions for type hints)
"""
raise TypeError(f"scale does not match {primitive_type}")
diff --git a/python/src/iceberg/io/pyarrow.py b/python/src/iceberg/io/pyarrow.py
index ced7001df..6b138e709 100644
--- a/python/src/iceberg/io/pyarrow.py
+++ b/python/src/iceberg/io/pyarrow.py
@@ -54,7 +54,7 @@ class PyArrowFile(InputFile, OutputFile):
"""
def __init__(self, location: str):
- parsed_location = urlparse(location) # Create a ParseResult from the uri
+ parsed_location = urlparse(location) # Create a ParseResult from the URI
if not parsed_location.scheme: # If no scheme, assume the path is to a local file
self._filesystem, self._path = FileSystem.from_uri(os.path.abspath(location))
else:
diff --git a/python/src/iceberg/schema.py b/python/src/iceberg/schema.py
index 36f7e1dcc..f3790036e 100644
--- a/python/src/iceberg/schema.py
+++ b/python/src/iceberg/schema.py
@@ -126,7 +126,7 @@ class Schema:
Args:
name_or_id (str | int): Either a field name or a field ID
- case_sensitive (bool, optional): Whether to peform a case-sensitive lookup using a field name. Defaults to True.
+ case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.
Returns:
NestedField: The matched NestedField
@@ -145,7 +145,7 @@ class Schema:
Args:
name_or_id (str | int): Either a field name or a field ID
- case_sensitive (bool, optional): Whether to peform a case-sensitive lookup using a field name. Defaults to True.
+ case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.
Returns:
NestedField: The type of the matched NestedField
@@ -180,7 +180,7 @@ class Schema:
Args:
names (List[str]): A list of column names
- case_sensitive (bool, optional): Whether to peform a case-sensitive lookup for each column name. Defaults to True.
+ case_sensitive (bool, optional): Whether to perform a case-sensitive lookup for each column name. Defaults to True.
Returns:
Schema: A new schema with pruned columns
diff --git a/python/src/iceberg/table/partitioning.py b/python/src/iceberg/table/partitioning.py
index 4a9faf4a3..d1e9debd4 100644
--- a/python/src/iceberg/table/partitioning.py
+++ b/python/src/iceberg/table/partitioning.py
@@ -24,7 +24,7 @@ class PartitionField:
Attributes:
source_id(int): The source column id of table's schema
- field_id(int): The partition field id across all the table metadata's partition specs
+ field_id(int): The partition field id across all the table partition specs
transform(Transform): The transform used to produce partition values from source column
name(str): The name of this partition field
"""
diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py
index 0b780119f..58b0d9de5 100644
--- a/python/src/iceberg/types.py
+++ b/python/src/iceberg/types.py
@@ -520,7 +520,7 @@ class UUIDType(PrimitiveType, Singleton):
class BinaryType(PrimitiveType, Singleton):
- """A Binary data type in Iceberg can be represented using an instance of this class. Binarys in
+ """A Binary data type in Iceberg can be represented using an instance of this class. Binaries in
Iceberg are arbitrary-length byte arrays.
Example:
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 7fde7a27d..131974abe 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -228,7 +228,7 @@ def test_schema_find_field_by_id_raise_on_unknown_field(table_schema_simple):
def test_schema_find_field_type_by_id(table_schema_simple):
- """Test retrieving a columns's type using its field ID"""
+ """Test retrieving a columns' type using its field ID"""
index = schema.index_by_id(table_schema_simple)
assert index[1] == NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False)
assert index[2] == NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True)
diff --git a/python/tox.ini b/python/tox.ini
index 4cc905a82..5a1a0a704 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -53,10 +53,13 @@ deps =
black
isort
autoflake
+ pylint
+ pyenchant
commands =
autoflake -r --check --ignore-init-module-imports --remove-all-unused-imports src tests
isort --profile black --check-only src tests
black --line-length 130 --check --diff src tests
+ pylint --disable all --enable spelling --spelling-dict en_US --spelling-private-dict-file spellcheck-dictionary.txt src tests
[testenv:format]
deps =