You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/08/07 20:24:33 UTC

[iceberg] branch master updated: Python: Move catalog package base.py classes to __init__.py (#5457)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 102f51ffab Python: Move catalog package base.py classes to __init__.py (#5457)
102f51ffab is described below

commit 102f51ffab3a51be77ecc9bafa68d55efb1136e4
Author: Samuel Redai <43...@users.noreply.github.com>
AuthorDate: Sun Aug 7 16:24:29 2022 -0400

    Python: Move catalog package base.py classes to __init__.py (#5457)
---
 python/pyiceberg/catalog/__init__.py               | 247 ++++++++++++++++++-
 python/pyiceberg/catalog/base.py                   | 261 ---------------------
 python/pyiceberg/catalog/hive.py                   |   8 +-
 python/pyiceberg/catalog/rest.py                   |   8 +-
 python/pyiceberg/table/base.py                     |   2 +-
 .../pyiceberg/{catalog/__init__.py => typedef.py}  |   0
 python/tests/catalog/test_base.py                  |   8 +-
 python/tests/catalog/test_hive.py                  |   2 +-
 python/tests/catalog/test_rest.py                  |   2 +-
 9 files changed, 265 insertions(+), 273 deletions(-)

diff --git a/python/pyiceberg/catalog/__init__.py b/python/pyiceberg/catalog/__init__.py
index d78cfc86a8..25063189a9 100644
--- a/python/pyiceberg/catalog/__init__.py
+++ b/python/pyiceberg/catalog/__init__.py
@@ -14,7 +14,248 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from typing import Dict, Tuple
 
-Identifier = Tuple[str, ...]
-Properties = Dict[str, str]
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+from pyiceberg.schema import Schema
+from pyiceberg.table.base import Table
+from pyiceberg.table.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
+from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
+from pyiceberg.typedef import Identifier, Properties
+
+
+@dataclass
+class PropertiesUpdateSummary:
+    removed: list[str]
+    updated: list[str]
+    missing: list[str]
+
+
+class Catalog(ABC):
+    """Base Catalog for table operations like - create, drop, load, list and others.
+
+    The catalog table APIs accept a table identifier, which is fully classified table name. The identifier can be a string or
+    tuple of strings. If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
+
+    The catalog namespace APIs follow a similar convention wherein they also accept a namespace identifier that can be a string
+    or tuple of strings.
+
+    Attributes:
+        name (str): Name of the catalog
+        properties (Properties): Catalog properties
+    """
+
+    def __init__(self, name: str, properties: Properties):
+        self._name = name
+        self._properties = properties
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def properties(self) -> Properties:
+        return self._properties
+
+    @abstractmethod
+    def create_table(
+        self,
+        identifier: str | Identifier,
+        schema: Schema,
+        location: str | None = None,
+        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
+        sort_order: SortOrder = UNSORTED_SORT_ORDER,
+        properties: Properties | None = None,
+    ) -> Table:
+        """Create a table
+
+        Args:
+            identifier (str | Identifier): Table identifier.
+            schema (Schema): Table's schema.
+            location (str): Location for the table. Optional Argument.
+            partition_spec (PartitionSpec): PartitionSpec for the table.
+            sort_order (SortOrder): SortOrder for the table.
+            properties (Properties | None): Table properties that can be a string based dictionary. Optional Argument.
+
+        Returns:
+            Table: the created table instance
+
+        Raises:
+            TableAlreadyExistsError: If a table with the name already exists
+        """
+
+    @abstractmethod
+    def load_table(self, identifier: str | Identifier) -> Table:
+        """Loads the table's metadata and returns the table instance.
+
+        You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'
+        Note: This method doesn't scan data stored in the table.
+
+        Args:
+            identifier (str | Identifier): Table identifier.
+
+        Returns:
+            Table: the table instance with its metadata
+
+        Raises:
+            NoSuchTableError: If a table with the name does not exist
+        """
+
+    @abstractmethod
+    def drop_table(self, identifier: str | Identifier) -> None:
+        """Drop a table.
+
+        Args:
+            identifier (str | Identifier): Table identifier.
+
+        Raises:
+            NoSuchTableError: If a table with the name does not exist
+        """
+
+    @abstractmethod
+    def purge_table(self, identifier: str | Identifier) -> None:
+        """Drop a table and purge all data and metadata files.
+
+        Args:
+            identifier (str | Identifier): Table identifier.
+
+        Raises:
+            NoSuchTableError: If a table with the name does not exist
+        """
+
+    @abstractmethod
+    def rename_table(self, from_identifier: str | Identifier, to_identifier: str | Identifier) -> Table:
+        """Rename a fully classified table name
+
+        Args:
+            from_identifier (str | Identifier): Existing table identifier.
+            to_identifier (str | Identifier): New table identifier.
+
+        Returns:
+            Table: the updated table instance with its metadata
+
+        Raises:
+            NoSuchTableError: If a table with the name does not exist
+        """
+
+    @abstractmethod
+    def create_namespace(self, namespace: str | Identifier, properties: Properties | None = None) -> None:
+        """Create a namespace in the catalog.
+
+        Args:
+            namespace (str | Identifier): Namespace identifier
+            properties (Properties | None): A string dictionary of properties for the given namespace
+
+        Raises:
+            NamespaceAlreadyExistsError: If a namespace with the given name already exists
+        """
+
+    @abstractmethod
+    def drop_namespace(self, namespace: str | Identifier) -> None:
+        """Drop a namespace.
+
+        Args:
+            namespace (str | Identifier): Namespace identifier
+
+        Raises:
+            NoSuchNamespaceError: If a namespace with the given name does not exist
+            NamespaceNotEmptyError: If the namespace is not empty
+        """
+
+    @abstractmethod
+    def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
+        """List tables under the given namespace in the catalog.
+
+        If namespace not provided, will list all tables in the catalog.
+
+        Args:
+            namespace (str | Identifier | None): Namespace identifier to search.
+
+        Returns:
+            List[Identifier]: list of table identifiers.
+
+        Raises:
+            NoSuchNamespaceError: If a namespace with the given name does not exist
+        """
+
+    @abstractmethod
+    def list_namespaces(self) -> list[Identifier]:
+        """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.
+
+        Returns:
+            List[Identifier]: a List of namespace identifiers
+
+        Raises:
+            NoSuchNamespaceError: If a namespace with the given name does not exist
+        """
+
+    @abstractmethod
+    def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
+        """Get properties for a namespace.
+
+        Args:
+            namespace (str | Identifier): Namespace identifier
+
+        Returns:
+            Properties: Properties for the given namespace
+
+        Raises:
+            NoSuchNamespaceError: If a namespace with the given name does not exist
+        """
+
+    @abstractmethod
+    def update_namespace_properties(
+        self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties | None = None
+    ) -> PropertiesUpdateSummary:
+        """Removes provided property keys and updates properties for a namespace.
+
+        Args:
+            namespace (str | Identifier): Namespace identifier
+            removals (Set[str]): Set of property keys that need to be removed. Optional Argument.
+            updates (Properties | None): Properties to be updated for the given namespace. Optional Argument.
+
+        Raises:
+            NoSuchNamespaceError: If a namespace with the given name does not exist
+            ValueError: If removals and updates have overlapping keys.
+        """
+
+    @staticmethod
+    def identifier_to_tuple(identifier: str | Identifier) -> Identifier:
+        """Parses an identifier to a tuple.
+
+        If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
+
+        Args:
+            identifier (str | Identifier: an identifier, either a string or tuple of strings
+
+        Returns:
+            Identifier: a tuple of strings
+        """
+        return identifier if isinstance(identifier, tuple) else tuple(str.split(identifier, "."))
+
+    @staticmethod
+    def table_name_from(identifier: str | Identifier) -> str:
+        """Extracts table name from a table identifier
+
+        Args:
+            identifier (str | Identifier: a table identifier
+
+        Returns:
+            str: Table name
+        """
+        return Catalog.identifier_to_tuple(identifier)[-1]
+
+    @staticmethod
+    def namespace_from(identifier: str | Identifier) -> Identifier:
+        """Extracts table namespace from a table identifier
+
+        Args:
+            identifier (str | Identifier: a table identifier
+
+        Returns:
+            Identifier: Namespace identifier
+        """
+        return Catalog.identifier_to_tuple(identifier)[:-1]
diff --git a/python/pyiceberg/catalog/base.py b/python/pyiceberg/catalog/base.py
deleted file mode 100644
index 49eca489bb..0000000000
--- a/python/pyiceberg/catalog/base.py
+++ /dev/null
@@ -1,261 +0,0 @@
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing,
-#  software distributed under the License is distributed on an
-#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#  KIND, either express or implied.  See the License for the
-#  specific language governing permissions and limitations
-#  under the License.
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.schema import Schema
-from pyiceberg.table.base import Table
-from pyiceberg.table.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
-from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
-
-
-@dataclass
-class PropertiesUpdateSummary:
-    removed: list[str]
-    updated: list[str]
-    missing: list[str]
-
-
-class Catalog(ABC):
-    """Base Catalog for table operations like - create, drop, load, list and others.
-
-    The catalog table APIs accept a table identifier, which is fully classified table name. The identifier can be a string or
-    tuple of strings. If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
-
-    The catalog namespace APIs follow a similar convention wherein they also accept a namespace identifier that can be a string
-    or tuple of strings.
-
-    Attributes:
-        name (str): Name of the catalog
-        properties (Properties): Catalog properties
-    """
-
-    def __init__(self, name: str, properties: Properties):
-        self._name = name
-        self._properties = properties
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @property
-    def properties(self) -> Properties:
-        return self._properties
-
-    @abstractmethod
-    def create_table(
-        self,
-        identifier: str | Identifier,
-        schema: Schema,
-        location: str | None = None,
-        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
-        sort_order: SortOrder = UNSORTED_SORT_ORDER,
-        properties: Properties | None = None,
-    ) -> Table:
-        """Create a table
-
-        Args:
-            identifier (str | Identifier): Table identifier.
-            schema (Schema): Table's schema.
-            location (str): Location for the table. Optional Argument.
-            partition_spec (PartitionSpec): PartitionSpec for the table.
-            sort_order (SortOrder): SortOrder for the table.
-            properties (Properties | None): Table properties that can be a string based dictionary. Optional Argument.
-
-        Returns:
-            Table: the created table instance
-
-        Raises:
-            TableAlreadyExistsError: If a table with the name already exists
-        """
-
-    @abstractmethod
-    def load_table(self, identifier: str | Identifier) -> Table:
-        """Loads the table's metadata and returns the table instance.
-
-        You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'
-        Note: This method doesn't scan data stored in the table.
-
-        Args:
-            identifier (str | Identifier): Table identifier.
-
-        Returns:
-            Table: the table instance with its metadata
-
-        Raises:
-            NoSuchTableError: If a table with the name does not exist
-        """
-
-    @abstractmethod
-    def drop_table(self, identifier: str | Identifier) -> None:
-        """Drop a table.
-
-        Args:
-            identifier (str | Identifier): Table identifier.
-
-        Raises:
-            NoSuchTableError: If a table with the name does not exist
-        """
-
-    @abstractmethod
-    def purge_table(self, identifier: str | Identifier) -> None:
-        """Drop a table and purge all data and metadata files.
-
-        Args:
-            identifier (str | Identifier): Table identifier.
-
-        Raises:
-            NoSuchTableError: If a table with the name does not exist
-        """
-
-    @abstractmethod
-    def rename_table(self, from_identifier: str | Identifier, to_identifier: str | Identifier) -> Table:
-        """Rename a fully classified table name
-
-        Args:
-            from_identifier (str | Identifier): Existing table identifier.
-            to_identifier (str | Identifier): New table identifier.
-
-        Returns:
-            Table: the updated table instance with its metadata
-
-        Raises:
-            NoSuchTableError: If a table with the name does not exist
-        """
-
-    @abstractmethod
-    def create_namespace(self, namespace: str | Identifier, properties: Properties | None = None) -> None:
-        """Create a namespace in the catalog.
-
-        Args:
-            namespace (str | Identifier): Namespace identifier
-            properties (Properties | None): A string dictionary of properties for the given namespace
-
-        Raises:
-            NamespaceAlreadyExistsError: If a namespace with the given name already exists
-        """
-
-    @abstractmethod
-    def drop_namespace(self, namespace: str | Identifier) -> None:
-        """Drop a namespace.
-
-        Args:
-            namespace (str | Identifier): Namespace identifier
-
-        Raises:
-            NoSuchNamespaceError: If a namespace with the given name does not exist
-            NamespaceNotEmptyError: If the namespace is not empty
-        """
-
-    @abstractmethod
-    def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
-        """List tables under the given namespace in the catalog.
-
-        If namespace not provided, will list all tables in the catalog.
-
-        Args:
-            namespace (str | Identifier | None): Namespace identifier to search.
-
-        Returns:
-            List[Identifier]: list of table identifiers.
-
-        Raises:
-            NoSuchNamespaceError: If a namespace with the given name does not exist
-        """
-
-    @abstractmethod
-    def list_namespaces(self) -> list[Identifier]:
-        """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.
-
-        Returns:
-            List[Identifier]: a List of namespace identifiers
-
-        Raises:
-            NoSuchNamespaceError: If a namespace with the given name does not exist
-        """
-
-    @abstractmethod
-    def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
-        """Get properties for a namespace.
-
-        Args:
-            namespace (str | Identifier): Namespace identifier
-
-        Returns:
-            Properties: Properties for the given namespace
-
-        Raises:
-            NoSuchNamespaceError: If a namespace with the given name does not exist
-        """
-
-    @abstractmethod
-    def update_namespace_properties(
-        self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties | None = None
-    ) -> PropertiesUpdateSummary:
-        """Removes provided property keys and updates properties for a namespace.
-
-        Args:
-            namespace (str | Identifier): Namespace identifier
-            removals (Set[str]): Set of property keys that need to be removed. Optional Argument.
-            updates (Properties | None): Properties to be updated for the given namespace. Optional Argument.
-
-        Raises:
-            NoSuchNamespaceError: If a namespace with the given name does not exist
-            ValueError: If removals and updates have overlapping keys.
-        """
-
-    @staticmethod
-    def identifier_to_tuple(identifier: str | Identifier) -> Identifier:
-        """Parses an identifier to a tuple.
-
-        If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
-
-        Args:
-            identifier (str | Identifier: an identifier, either a string or tuple of strings
-
-        Returns:
-            Identifier: a tuple of strings
-        """
-        return identifier if isinstance(identifier, tuple) else tuple(str.split(identifier, "."))
-
-    @staticmethod
-    def table_name_from(identifier: str | Identifier) -> str:
-        """Extracts table name from a table identifier
-
-        Args:
-            identifier (str | Identifier: a table identifier
-
-        Returns:
-            str: Table name
-        """
-        return Catalog.identifier_to_tuple(identifier)[-1]
-
-    @staticmethod
-    def namespace_from(identifier: str | Identifier) -> Identifier:
-        """Extracts table namespace from a table identifier
-
-        Args:
-            identifier (str | Identifier: a table identifier
-
-        Returns:
-            Identifier: Namespace identifier
-        """
-        return Catalog.identifier_to_tuple(identifier)[:-1]
diff --git a/python/pyiceberg/catalog/hive.py b/python/pyiceberg/catalog/hive.py
index f04fc22338..991f800130 100644
--- a/python/pyiceberg/catalog/hive.py
+++ b/python/pyiceberg/catalog/hive.py
@@ -43,8 +43,12 @@ from hive_metastore.ttypes import Table as HiveTable
 from thrift.protocol import TBinaryProtocol
 from thrift.transport import TSocket, TTransport
 
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary
+from pyiceberg.catalog import (
+    Catalog,
+    Identifier,
+    Properties,
+    PropertiesUpdateSummary,
+)
 from pyiceberg.exceptions import (
     NamespaceAlreadyExistsError,
     NamespaceNotEmptyError,
diff --git a/python/pyiceberg/catalog/rest.py b/python/pyiceberg/catalog/rest.py
index bd6ea36b34..073996931d 100644
--- a/python/pyiceberg/catalog/rest.py
+++ b/python/pyiceberg/catalog/rest.py
@@ -30,8 +30,12 @@ from pydantic import Field
 from requests import HTTPError
 
 from pyiceberg import __version__
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary
+from pyiceberg.catalog import (
+    Catalog,
+    Identifier,
+    Properties,
+    PropertiesUpdateSummary,
+)
 from pyiceberg.exceptions import (
     AuthorizationExpiredError,
     BadCredentialsError,
diff --git a/python/pyiceberg/table/base.py b/python/pyiceberg/table/base.py
index 5ade9d7251..ad31e75cdd 100644
--- a/python/pyiceberg/table/base.py
+++ b/python/pyiceberg/table/base.py
@@ -19,8 +19,8 @@ from typing import Optional, Union
 
 from pydantic import Field
 
-from pyiceberg.catalog.base import Identifier
 from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2
+from pyiceberg.typedef import Identifier
 from pyiceberg.utils.iceberg_base_model import IcebergBaseModel
 
 
diff --git a/python/pyiceberg/catalog/__init__.py b/python/pyiceberg/typedef.py
similarity index 100%
copy from python/pyiceberg/catalog/__init__.py
copy to python/pyiceberg/typedef.py
diff --git a/python/tests/catalog/test_base.py b/python/tests/catalog/test_base.py
index 3ca41da083..7b29aeb5da 100644
--- a/python/tests/catalog/test_base.py
+++ b/python/tests/catalog/test_base.py
@@ -25,8 +25,12 @@ from typing import (
 
 import pytest
 
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary
+from pyiceberg.catalog import (
+    Catalog,
+    Identifier,
+    Properties,
+    PropertiesUpdateSummary,
+)
 from pyiceberg.exceptions import (
     NamespaceAlreadyExistsError,
     NamespaceNotEmptyError,
diff --git a/python/tests/catalog/test_hive.py b/python/tests/catalog/test_hive.py
index 77e0de02e7..63443ee720 100644
--- a/python/tests/catalog/test_hive.py
+++ b/python/tests/catalog/test_hive.py
@@ -31,7 +31,7 @@ from hive_metastore.ttypes import (
 )
 from hive_metastore.ttypes import Table as HiveTable
 
-from pyiceberg.catalog.base import PropertiesUpdateSummary
+from pyiceberg.catalog import PropertiesUpdateSummary
 from pyiceberg.catalog.hive import HiveCatalog
 from pyiceberg.exceptions import (
     NamespaceAlreadyExistsError,
diff --git a/python/tests/catalog/test_rest.py b/python/tests/catalog/test_rest.py
index 11b1885d54..5829bd1448 100644
--- a/python/tests/catalog/test_rest.py
+++ b/python/tests/catalog/test_rest.py
@@ -20,7 +20,7 @@ from uuid import UUID
 import pytest
 from requests_mock import Mocker
 
-from pyiceberg.catalog.base import PropertiesUpdateSummary, Table
+from pyiceberg.catalog import PropertiesUpdateSummary, Table
 from pyiceberg.catalog.rest import RestCatalog
 from pyiceberg.exceptions import (
     BadCredentialsError,