You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/08/07 20:24:33 UTC
[iceberg] branch master updated: Python: Move catalog package base.py classes to __init__.py (#5457)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 102f51ffab Python: Move catalog package base.py classes to __init__.py (#5457)
102f51ffab is described below
commit 102f51ffab3a51be77ecc9bafa68d55efb1136e4
Author: Samuel Redai <43...@users.noreply.github.com>
AuthorDate: Sun Aug 7 16:24:29 2022 -0400
Python: Move catalog package base.py classes to __init__.py (#5457)
---
python/pyiceberg/catalog/__init__.py | 247 ++++++++++++++++++-
python/pyiceberg/catalog/base.py | 261 ---------------------
python/pyiceberg/catalog/hive.py | 8 +-
python/pyiceberg/catalog/rest.py | 8 +-
python/pyiceberg/table/base.py | 2 +-
.../pyiceberg/{catalog/__init__.py => typedef.py} | 0
python/tests/catalog/test_base.py | 8 +-
python/tests/catalog/test_hive.py | 2 +-
python/tests/catalog/test_rest.py | 2 +-
9 files changed, 265 insertions(+), 273 deletions(-)
diff --git a/python/pyiceberg/catalog/__init__.py b/python/pyiceberg/catalog/__init__.py
index d78cfc86a8..25063189a9 100644
--- a/python/pyiceberg/catalog/__init__.py
+++ b/python/pyiceberg/catalog/__init__.py
@@ -14,7 +14,248 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-from typing import Dict, Tuple
-Identifier = Tuple[str, ...]
-Properties = Dict[str, str]
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+from pyiceberg.schema import Schema
+from pyiceberg.table.base import Table
+from pyiceberg.table.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
+from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
+from pyiceberg.typedef import Identifier, Properties
+
+
+@dataclass
+class PropertiesUpdateSummary:
+ removed: list[str]
+ updated: list[str]
+ missing: list[str]
+
+
+class Catalog(ABC):
+ """Base Catalog for table operations like - create, drop, load, list and others.
+
+ The catalog table APIs accept a table identifier, which is fully classified table name. The identifier can be a string or
+ tuple of strings. If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
+
+ The catalog namespace APIs follow a similar convention wherein they also accept a namespace identifier that can be a string
+ or tuple of strings.
+
+ Attributes:
+ name (str): Name of the catalog
+ properties (Properties): Catalog properties
+ """
+
+ def __init__(self, name: str, properties: Properties):
+ self._name = name
+ self._properties = properties
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @property
+ def properties(self) -> Properties:
+ return self._properties
+
+ @abstractmethod
+ def create_table(
+ self,
+ identifier: str | Identifier,
+ schema: Schema,
+ location: str | None = None,
+ partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
+ sort_order: SortOrder = UNSORTED_SORT_ORDER,
+ properties: Properties | None = None,
+ ) -> Table:
+ """Create a table
+
+ Args:
+ identifier (str | Identifier): Table identifier.
+ schema (Schema): Table's schema.
+ location (str): Location for the table. Optional Argument.
+ partition_spec (PartitionSpec): PartitionSpec for the table.
+ sort_order (SortOrder): SortOrder for the table.
+ properties (Properties | None): Table properties that can be a string based dictionary. Optional Argument.
+
+ Returns:
+ Table: the created table instance
+
+ Raises:
+ TableAlreadyExistsError: If a table with the name already exists
+ """
+
+ @abstractmethod
+ def load_table(self, identifier: str | Identifier) -> Table:
+ """Loads the table's metadata and returns the table instance.
+
+ You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'
+ Note: This method doesn't scan data stored in the table.
+
+ Args:
+ identifier (str | Identifier): Table identifier.
+
+ Returns:
+ Table: the table instance with its metadata
+
+ Raises:
+ NoSuchTableError: If a table with the name does not exist
+ """
+
+ @abstractmethod
+ def drop_table(self, identifier: str | Identifier) -> None:
+ """Drop a table.
+
+ Args:
+ identifier (str | Identifier): Table identifier.
+
+ Raises:
+ NoSuchTableError: If a table with the name does not exist
+ """
+
+ @abstractmethod
+ def purge_table(self, identifier: str | Identifier) -> None:
+ """Drop a table and purge all data and metadata files.
+
+ Args:
+ identifier (str | Identifier): Table identifier.
+
+ Raises:
+ NoSuchTableError: If a table with the name does not exist
+ """
+
+ @abstractmethod
+ def rename_table(self, from_identifier: str | Identifier, to_identifier: str | Identifier) -> Table:
+ """Rename a fully classified table name
+
+ Args:
+ from_identifier (str | Identifier): Existing table identifier.
+ to_identifier (str | Identifier): New table identifier.
+
+ Returns:
+ Table: the updated table instance with its metadata
+
+ Raises:
+ NoSuchTableError: If a table with the name does not exist
+ """
+
+ @abstractmethod
+ def create_namespace(self, namespace: str | Identifier, properties: Properties | None = None) -> None:
+ """Create a namespace in the catalog.
+
+ Args:
+ namespace (str | Identifier): Namespace identifier
+ properties (Properties | None): A string dictionary of properties for the given namespace
+
+ Raises:
+ NamespaceAlreadyExistsError: If a namespace with the given name already exists
+ """
+
+ @abstractmethod
+ def drop_namespace(self, namespace: str | Identifier) -> None:
+ """Drop a namespace.
+
+ Args:
+ namespace (str | Identifier): Namespace identifier
+
+ Raises:
+ NoSuchNamespaceError: If a namespace with the given name does not exist
+ NamespaceNotEmptyError: If the namespace is not empty
+ """
+
+ @abstractmethod
+ def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
+ """List tables under the given namespace in the catalog.
+
+ If namespace not provided, will list all tables in the catalog.
+
+ Args:
+ namespace (str | Identifier | None): Namespace identifier to search.
+
+ Returns:
+ List[Identifier]: list of table identifiers.
+
+ Raises:
+ NoSuchNamespaceError: If a namespace with the given name does not exist
+ """
+
+ @abstractmethod
+ def list_namespaces(self) -> list[Identifier]:
+ """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.
+
+ Returns:
+ List[Identifier]: a List of namespace identifiers
+
+ Raises:
+ NoSuchNamespaceError: If a namespace with the given name does not exist
+ """
+
+ @abstractmethod
+ def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
+ """Get properties for a namespace.
+
+ Args:
+ namespace (str | Identifier): Namespace identifier
+
+ Returns:
+ Properties: Properties for the given namespace
+
+ Raises:
+ NoSuchNamespaceError: If a namespace with the given name does not exist
+ """
+
+ @abstractmethod
+ def update_namespace_properties(
+ self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties | None = None
+ ) -> PropertiesUpdateSummary:
+ """Removes provided property keys and updates properties for a namespace.
+
+ Args:
+ namespace (str | Identifier): Namespace identifier
+ removals (Set[str]): Set of property keys that need to be removed. Optional Argument.
+ updates (Properties | None): Properties to be updated for the given namespace. Optional Argument.
+
+ Raises:
+ NoSuchNamespaceError: If a namespace with the given name does not exist
+ ValueError: If removals and updates have overlapping keys.
+ """
+
+ @staticmethod
+ def identifier_to_tuple(identifier: str | Identifier) -> Identifier:
+ """Parses an identifier to a tuple.
+
+ If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
+
+ Args:
+ identifier (str | Identifier: an identifier, either a string or tuple of strings
+
+ Returns:
+ Identifier: a tuple of strings
+ """
+ return identifier if isinstance(identifier, tuple) else tuple(str.split(identifier, "."))
+
+ @staticmethod
+ def table_name_from(identifier: str | Identifier) -> str:
+ """Extracts table name from a table identifier
+
+ Args:
+ identifier (str | Identifier: a table identifier
+
+ Returns:
+ str: Table name
+ """
+ return Catalog.identifier_to_tuple(identifier)[-1]
+
+ @staticmethod
+ def namespace_from(identifier: str | Identifier) -> Identifier:
+ """Extracts table namespace from a table identifier
+
+ Args:
+ identifier (str | Identifier: a table identifier
+
+ Returns:
+ Identifier: Namespace identifier
+ """
+ return Catalog.identifier_to_tuple(identifier)[:-1]
diff --git a/python/pyiceberg/catalog/base.py b/python/pyiceberg/catalog/base.py
deleted file mode 100644
index 49eca489bb..0000000000
--- a/python/pyiceberg/catalog/base.py
+++ /dev/null
@@ -1,261 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.schema import Schema
-from pyiceberg.table.base import Table
-from pyiceberg.table.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
-from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
-
-
-@dataclass
-class PropertiesUpdateSummary:
- removed: list[str]
- updated: list[str]
- missing: list[str]
-
-
-class Catalog(ABC):
- """Base Catalog for table operations like - create, drop, load, list and others.
-
- The catalog table APIs accept a table identifier, which is fully classified table name. The identifier can be a string or
- tuple of strings. If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
-
- The catalog namespace APIs follow a similar convention wherein they also accept a namespace identifier that can be a string
- or tuple of strings.
-
- Attributes:
- name (str): Name of the catalog
- properties (Properties): Catalog properties
- """
-
- def __init__(self, name: str, properties: Properties):
- self._name = name
- self._properties = properties
-
- @property
- def name(self) -> str:
- return self._name
-
- @property
- def properties(self) -> Properties:
- return self._properties
-
- @abstractmethod
- def create_table(
- self,
- identifier: str | Identifier,
- schema: Schema,
- location: str | None = None,
- partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
- sort_order: SortOrder = UNSORTED_SORT_ORDER,
- properties: Properties | None = None,
- ) -> Table:
- """Create a table
-
- Args:
- identifier (str | Identifier): Table identifier.
- schema (Schema): Table's schema.
- location (str): Location for the table. Optional Argument.
- partition_spec (PartitionSpec): PartitionSpec for the table.
- sort_order (SortOrder): SortOrder for the table.
- properties (Properties | None): Table properties that can be a string based dictionary. Optional Argument.
-
- Returns:
- Table: the created table instance
-
- Raises:
- TableAlreadyExistsError: If a table with the name already exists
- """
-
- @abstractmethod
- def load_table(self, identifier: str | Identifier) -> Table:
- """Loads the table's metadata and returns the table instance.
-
- You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'
- Note: This method doesn't scan data stored in the table.
-
- Args:
- identifier (str | Identifier): Table identifier.
-
- Returns:
- Table: the table instance with its metadata
-
- Raises:
- NoSuchTableError: If a table with the name does not exist
- """
-
- @abstractmethod
- def drop_table(self, identifier: str | Identifier) -> None:
- """Drop a table.
-
- Args:
- identifier (str | Identifier): Table identifier.
-
- Raises:
- NoSuchTableError: If a table with the name does not exist
- """
-
- @abstractmethod
- def purge_table(self, identifier: str | Identifier) -> None:
- """Drop a table and purge all data and metadata files.
-
- Args:
- identifier (str | Identifier): Table identifier.
-
- Raises:
- NoSuchTableError: If a table with the name does not exist
- """
-
- @abstractmethod
- def rename_table(self, from_identifier: str | Identifier, to_identifier: str | Identifier) -> Table:
- """Rename a fully classified table name
-
- Args:
- from_identifier (str | Identifier): Existing table identifier.
- to_identifier (str | Identifier): New table identifier.
-
- Returns:
- Table: the updated table instance with its metadata
-
- Raises:
- NoSuchTableError: If a table with the name does not exist
- """
-
- @abstractmethod
- def create_namespace(self, namespace: str | Identifier, properties: Properties | None = None) -> None:
- """Create a namespace in the catalog.
-
- Args:
- namespace (str | Identifier): Namespace identifier
- properties (Properties | None): A string dictionary of properties for the given namespace
-
- Raises:
- NamespaceAlreadyExistsError: If a namespace with the given name already exists
- """
-
- @abstractmethod
- def drop_namespace(self, namespace: str | Identifier) -> None:
- """Drop a namespace.
-
- Args:
- namespace (str | Identifier): Namespace identifier
-
- Raises:
- NoSuchNamespaceError: If a namespace with the given name does not exist
- NamespaceNotEmptyError: If the namespace is not empty
- """
-
- @abstractmethod
- def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
- """List tables under the given namespace in the catalog.
-
- If namespace not provided, will list all tables in the catalog.
-
- Args:
- namespace (str | Identifier | None): Namespace identifier to search.
-
- Returns:
- List[Identifier]: list of table identifiers.
-
- Raises:
- NoSuchNamespaceError: If a namespace with the given name does not exist
- """
-
- @abstractmethod
- def list_namespaces(self) -> list[Identifier]:
- """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.
-
- Returns:
- List[Identifier]: a List of namespace identifiers
-
- Raises:
- NoSuchNamespaceError: If a namespace with the given name does not exist
- """
-
- @abstractmethod
- def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
- """Get properties for a namespace.
-
- Args:
- namespace (str | Identifier): Namespace identifier
-
- Returns:
- Properties: Properties for the given namespace
-
- Raises:
- NoSuchNamespaceError: If a namespace with the given name does not exist
- """
-
- @abstractmethod
- def update_namespace_properties(
- self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties | None = None
- ) -> PropertiesUpdateSummary:
- """Removes provided property keys and updates properties for a namespace.
-
- Args:
- namespace (str | Identifier): Namespace identifier
- removals (Set[str]): Set of property keys that need to be removed. Optional Argument.
- updates (Properties | None): Properties to be updated for the given namespace. Optional Argument.
-
- Raises:
- NoSuchNamespaceError: If a namespace with the given name does not exist
- ValueError: If removals and updates have overlapping keys.
- """
-
- @staticmethod
- def identifier_to_tuple(identifier: str | Identifier) -> Identifier:
- """Parses an identifier to a tuple.
-
- If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.
-
- Args:
- identifier (str | Identifier: an identifier, either a string or tuple of strings
-
- Returns:
- Identifier: a tuple of strings
- """
- return identifier if isinstance(identifier, tuple) else tuple(str.split(identifier, "."))
-
- @staticmethod
- def table_name_from(identifier: str | Identifier) -> str:
- """Extracts table name from a table identifier
-
- Args:
- identifier (str | Identifier: a table identifier
-
- Returns:
- str: Table name
- """
- return Catalog.identifier_to_tuple(identifier)[-1]
-
- @staticmethod
- def namespace_from(identifier: str | Identifier) -> Identifier:
- """Extracts table namespace from a table identifier
-
- Args:
- identifier (str | Identifier: a table identifier
-
- Returns:
- Identifier: Namespace identifier
- """
- return Catalog.identifier_to_tuple(identifier)[:-1]
diff --git a/python/pyiceberg/catalog/hive.py b/python/pyiceberg/catalog/hive.py
index f04fc22338..991f800130 100644
--- a/python/pyiceberg/catalog/hive.py
+++ b/python/pyiceberg/catalog/hive.py
@@ -43,8 +43,12 @@ from hive_metastore.ttypes import Table as HiveTable
from thrift.protocol import TBinaryProtocol
from thrift.transport import TSocket, TTransport
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary
+from pyiceberg.catalog import (
+ Catalog,
+ Identifier,
+ Properties,
+ PropertiesUpdateSummary,
+)
from pyiceberg.exceptions import (
NamespaceAlreadyExistsError,
NamespaceNotEmptyError,
diff --git a/python/pyiceberg/catalog/rest.py b/python/pyiceberg/catalog/rest.py
index bd6ea36b34..073996931d 100644
--- a/python/pyiceberg/catalog/rest.py
+++ b/python/pyiceberg/catalog/rest.py
@@ -30,8 +30,12 @@ from pydantic import Field
from requests import HTTPError
from pyiceberg import __version__
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary
+from pyiceberg.catalog import (
+ Catalog,
+ Identifier,
+ Properties,
+ PropertiesUpdateSummary,
+)
from pyiceberg.exceptions import (
AuthorizationExpiredError,
BadCredentialsError,
diff --git a/python/pyiceberg/table/base.py b/python/pyiceberg/table/base.py
index 5ade9d7251..ad31e75cdd 100644
--- a/python/pyiceberg/table/base.py
+++ b/python/pyiceberg/table/base.py
@@ -19,8 +19,8 @@ from typing import Optional, Union
from pydantic import Field
-from pyiceberg.catalog.base import Identifier
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2
+from pyiceberg.typedef import Identifier
from pyiceberg.utils.iceberg_base_model import IcebergBaseModel
diff --git a/python/pyiceberg/catalog/__init__.py b/python/pyiceberg/typedef.py
similarity index 100%
copy from python/pyiceberg/catalog/__init__.py
copy to python/pyiceberg/typedef.py
diff --git a/python/tests/catalog/test_base.py b/python/tests/catalog/test_base.py
index 3ca41da083..7b29aeb5da 100644
--- a/python/tests/catalog/test_base.py
+++ b/python/tests/catalog/test_base.py
@@ -25,8 +25,12 @@ from typing import (
import pytest
-from pyiceberg.catalog import Identifier, Properties
-from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary
+from pyiceberg.catalog import (
+ Catalog,
+ Identifier,
+ Properties,
+ PropertiesUpdateSummary,
+)
from pyiceberg.exceptions import (
NamespaceAlreadyExistsError,
NamespaceNotEmptyError,
diff --git a/python/tests/catalog/test_hive.py b/python/tests/catalog/test_hive.py
index 77e0de02e7..63443ee720 100644
--- a/python/tests/catalog/test_hive.py
+++ b/python/tests/catalog/test_hive.py
@@ -31,7 +31,7 @@ from hive_metastore.ttypes import (
)
from hive_metastore.ttypes import Table as HiveTable
-from pyiceberg.catalog.base import PropertiesUpdateSummary
+from pyiceberg.catalog import PropertiesUpdateSummary
from pyiceberg.catalog.hive import HiveCatalog
from pyiceberg.exceptions import (
NamespaceAlreadyExistsError,
diff --git a/python/tests/catalog/test_rest.py b/python/tests/catalog/test_rest.py
index 11b1885d54..5829bd1448 100644
--- a/python/tests/catalog/test_rest.py
+++ b/python/tests/catalog/test_rest.py
@@ -20,7 +20,7 @@ from uuid import UUID
import pytest
from requests_mock import Mocker
-from pyiceberg.catalog.base import PropertiesUpdateSummary, Table
+from pyiceberg.catalog import PropertiesUpdateSummary, Table
from pyiceberg.catalog.rest import RestCatalog
from pyiceberg.exceptions import (
BadCredentialsError,