You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/05/13 23:09:07 UTC

[iceberg] branch master updated: Python: Add spellcheck to the CI (#4730)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 24f1ae096 Python: Add spellcheck to the CI (#4730)
24f1ae096 is described below

commit 24f1ae09636dfe655ff26d8850985faebef192bd
Author: Fokko Driesprong <fo...@tabular.io>
AuthorDate: Sat May 14 01:09:03 2022 +0200

    Python: Add spellcheck to the CI (#4730)
---
 dev/.rat-excludes                        |  2 +-
 python/spellcheck-dictionary.txt         | 45 ++++++++++++++++++++++++++++++++
 python/src/iceberg/conversions.py        |  2 +-
 python/src/iceberg/io/pyarrow.py         |  2 +-
 python/src/iceberg/schema.py             |  6 ++---
 python/src/iceberg/table/partitioning.py |  2 +-
 python/src/iceberg/types.py              |  2 +-
 python/tests/test_schema.py              |  2 +-
 python/tox.ini                           |  3 +++
 9 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 14d4d287c..7ed3646ea 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -25,4 +25,4 @@ package-list
 sitemap.xml
 derby.log
 .python-version
-.*_index.md
\ No newline at end of file
+.*_index.md
diff --git a/python/spellcheck-dictionary.txt b/python/spellcheck-dictionary.txt
new file mode 100644
index 000000000..2476d5afd
--- /dev/null
+++ b/python/spellcheck-dictionary.txt
@@ -0,0 +1,45 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+accessor
+accessors
+Args
+ASF
+BD
+bool
+boolean
+datetime
+disjunction
+dispatchable
+endian
+enum
+FileInfo
+filesystem
+fs
+func
+io
+NativeFile
+nullability
+pragma
+pyarrow
+repr
+schemas
+seekable
+singledispatch
+str
+struct
+StructProtocol
+StructType
+Timestamptz
+Timestamptzs
+unscaled
+URI
\ No newline at end of file
diff --git a/python/src/iceberg/conversions.py b/python/src/iceberg/conversions.py
index 0af73567e..1df594985 100644
--- a/python/src/iceberg/conversions.py
+++ b/python/src/iceberg/conversions.py
@@ -147,7 +147,7 @@ def to_bytes(primitive_type: PrimitiveType, value: Union[bool, bytes, Decimal, f
     Args:
         primitive_type(PrimitiveType): An implementation of the PrimitiveType base class
         value: The value to convert to bytes (The type of this value depends on which dispatched function is
-            used--check dispatchable functions for typehints)
+            used--check dispatchable functions for type hints)
     """
     raise TypeError(f"scale does not match {primitive_type}")
 
diff --git a/python/src/iceberg/io/pyarrow.py b/python/src/iceberg/io/pyarrow.py
index ced7001df..6b138e709 100644
--- a/python/src/iceberg/io/pyarrow.py
+++ b/python/src/iceberg/io/pyarrow.py
@@ -54,7 +54,7 @@ class PyArrowFile(InputFile, OutputFile):
     """
 
     def __init__(self, location: str):
-        parsed_location = urlparse(location)  # Create a ParseResult from the uri
+        parsed_location = urlparse(location)  # Create a ParseResult from the URI
         if not parsed_location.scheme:  # If no scheme, assume the path is to a local file
             self._filesystem, self._path = FileSystem.from_uri(os.path.abspath(location))
         else:
diff --git a/python/src/iceberg/schema.py b/python/src/iceberg/schema.py
index 36f7e1dcc..f3790036e 100644
--- a/python/src/iceberg/schema.py
+++ b/python/src/iceberg/schema.py
@@ -126,7 +126,7 @@ class Schema:
 
         Args:
             name_or_id (str | int): Either a field name or a field ID
-            case_sensitive (bool, optional): Whether to peform a case-sensitive lookup using a field name. Defaults to True.
+            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.
 
         Returns:
             NestedField: The matched NestedField
@@ -145,7 +145,7 @@ class Schema:
 
         Args:
             name_or_id (str | int): Either a field name or a field ID
-            case_sensitive (bool, optional): Whether to peform a case-sensitive lookup using a field name. Defaults to True.
+            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.
 
         Returns:
             NestedField: The type of the matched NestedField
@@ -180,7 +180,7 @@ class Schema:
 
         Args:
             names (List[str]): A list of column names
-            case_sensitive (bool, optional): Whether to peform a case-sensitive lookup for each column name. Defaults to True.
+            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup for each column name. Defaults to True.
 
         Returns:
             Schema: A new schema with pruned columns
diff --git a/python/src/iceberg/table/partitioning.py b/python/src/iceberg/table/partitioning.py
index 4a9faf4a3..d1e9debd4 100644
--- a/python/src/iceberg/table/partitioning.py
+++ b/python/src/iceberg/table/partitioning.py
@@ -24,7 +24,7 @@ class PartitionField:
 
     Attributes:
         source_id(int): The source column id of table's schema
-        field_id(int): The partition field id across all the table metadata's partition specs
+        field_id(int): The partition field id across all the table partition specs
         transform(Transform): The transform used to produce partition values from source column
         name(str): The name of this partition field
     """
diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py
index 0b780119f..58b0d9de5 100644
--- a/python/src/iceberg/types.py
+++ b/python/src/iceberg/types.py
@@ -520,7 +520,7 @@ class UUIDType(PrimitiveType, Singleton):
 
 
 class BinaryType(PrimitiveType, Singleton):
-    """A Binary data type in Iceberg can be represented using an instance of this class. Binarys in
+    """A Binary data type in Iceberg can be represented using an instance of this class. Binaries in
     Iceberg are arbitrary-length byte arrays.
 
     Example:
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 7fde7a27d..131974abe 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -228,7 +228,7 @@ def test_schema_find_field_by_id_raise_on_unknown_field(table_schema_simple):
 
 
 def test_schema_find_field_type_by_id(table_schema_simple):
-    """Test retrieving a columns's type using its field ID"""
+    """Test retrieving a columns' type using its field ID"""
     index = schema.index_by_id(table_schema_simple)
     assert index[1] == NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False)
     assert index[2] == NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True)
diff --git a/python/tox.ini b/python/tox.ini
index 4cc905a82..5a1a0a704 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -53,10 +53,13 @@ deps =
     black
     isort
     autoflake
+    pylint
+    pyenchant
 commands =
     autoflake -r --check --ignore-init-module-imports --remove-all-unused-imports src tests
     isort --profile black --check-only src tests
     black --line-length 130 --check --diff src tests
+    pylint --disable all --enable spelling --spelling-dict en_US --spelling-private-dict-file spellcheck-dictionary.txt src tests
 
 [testenv:format]
 deps =