You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2021/10/12 14:51:34 UTC

[iceberg] branch master updated: Core: Fail if both Catalog type and catalog-impl are configured (#3162)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new c5eeb97  Core: Fail if both Catalog type and catalog-impl are configured (#3162)
c5eeb97 is described below

commit c5eeb97085f22927d87d1ef1d4791256c0eb7ad5
Author: Omar Al-Safi <om...@gmail.com>
AuthorDate: Tue Oct 12 16:51:07 2021 +0200

    Core: Fail if both Catalog type and catalog-impl are configured (#3162)
---
 core/src/main/java/org/apache/iceberg/CatalogUtil.java     |  5 +++++
 core/src/test/java/org/apache/iceberg/TestCatalogUtil.java | 12 ++++++++++++
 mr/src/main/java/org/apache/iceberg/mr/Catalogs.java       | 10 ++++++----
 mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java   | 11 +++++++++++
 site/docs/hive.md                                          |  4 ++--
 site/docs/spark-configuration.md                           |  7 +++----
 6 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
index 4e477b8..bada6fd 100644
--- a/core/src/main/java/org/apache/iceberg/CatalogUtil.java
+++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
@@ -220,6 +220,11 @@ public class CatalogUtil {
         default:
           throw new UnsupportedOperationException("Unknown catalog type: " + catalogType);
       }
+    } else {
+      String catalogType = options.get(ICEBERG_CATALOG_TYPE);
+      Preconditions.checkArgument(catalogType == null,
+          "Cannot create catalog %s, both type and catalog-impl are set: type=%s, catalog-impl=%s",
+          name, catalogType, catalogImpl);
     }
 
     return CatalogUtil.loadCatalog(catalogImpl, name, options, conf);
diff --git a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
index a008f9b..eb393cf 100644
--- a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
+++ b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
@@ -159,6 +159,18 @@ public class TestCatalogUtil {
         () -> CatalogUtil.loadFileIO(TestFileIONotImpl.class.getName(), Maps.newHashMap(), null));
   }
 
+  @Test
+  public void buildCustomCatalog_withTypeSet() {
+    Map<String, String> options = new HashMap<>();
+    options.put(CatalogProperties.CATALOG_IMPL, "CustomCatalog");
+    options.put(CatalogUtil.ICEBERG_CATALOG_TYPE, "hive");
+    Configuration hadoopConf = new Configuration();
+    String name = "custom";
+
+    AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class,
+        "both type and catalog-impl are set", () -> CatalogUtil.buildIcebergCatalog(name, options, hadoopConf));
+  }
+
   public static class TestCatalog extends BaseMetastoreCatalog {
 
     private String catalogName;
diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index 2e54cf5..e02a070 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -202,7 +202,11 @@ public final class Catalogs {
    */
   public static boolean hiveCatalog(Configuration conf, Properties props) {
     String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
-    return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(getCatalogType(conf, catalogName));
+    String catalogType = getCatalogType(conf, catalogName);
+    if (catalogType != null) {
+      return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(catalogType);
+    }
+    return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(getCatalogType(conf, ICEBERG_DEFAULT_CATALOG_NAME));
   }
 
   @VisibleForTesting
@@ -279,9 +283,7 @@ public final class Catalogs {
       }
     } else {
       String catalogType = conf.get(InputFormatConfig.CATALOG);
-      if (catalogType == null) {
-        return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE;
-      } else if (catalogType.equals(LOCATION)) {
+      if (catalogType != null && catalogType.equals(LOCATION)) {
         return NO_CATALOG_TYPE;
       } else {
         return catalogType;
diff --git a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
index 0d5b1d6..6ac9808 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
@@ -257,6 +257,17 @@ public class TestCatalogs {
   }
 
   @Test
+  public void testLegacyLoadCustomCatalogWithHiveCatalogTypeSet() {
+    String catalogName = "barCatalog";
+    conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE),
+            CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE);
+    conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName());
+    conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation");
+    AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class,
+            "both type and catalog-impl are set", () -> Catalogs.loadCatalog(conf, catalogName));
+  }
+
+  @Test
   public void testLoadCatalogHadoop() {
     String catalogName = "barCatalog";
     conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE),
diff --git a/site/docs/hive.md b/site/docs/hive.md
index ea4b90a..b05f389 100644
--- a/site/docs/hive.md
+++ b/site/docs/hive.md
@@ -98,8 +98,8 @@ To globally register different catalogs, set the following Hadoop configurations
 
 | Config Key                                    | Description                                            |
 | --------------------------------------------- | ------------------------------------------------------ |
-| iceberg.catalog.<catalog_name\>.type          | type of catalog: `hive` or `hadoop`                    |
-| iceberg.catalog.<catalog_name\>.catalog-impl  | catalog implementation, must not be null if type is null |
+| iceberg.catalog.<catalog_name\>.type          | type of catalog: `hive`, `hadoop`, or left unset if using a custom catalog  |
+| iceberg.catalog.<catalog_name\>.catalog-impl  | catalog implementation, must not be null if type is empty |
 | iceberg.catalog.<catalog_name\>.<key\>        | any config key and value pairs for the catalog         |
 
 Here are some examples using Hive CLI:
diff --git a/site/docs/spark-configuration.md b/site/docs/spark-configuration.md
index 93a8759..67befea 100644
--- a/site/docs/spark-configuration.md
+++ b/site/docs/spark-configuration.md
@@ -54,8 +54,8 @@ Both catalogs are configured using properties nested under the catalog name. Com
 
 | Property                                           | Values                        | Description                                                          |
 | -------------------------------------------------- | ----------------------------- | -------------------------------------------------------------------- |
-| spark.sql.catalog._catalog-name_.type              | `hive` or `hadoop`            | The underlying Iceberg catalog implementation, `HiveCatalog` or `HadoopCatalog` |
-| spark.sql.catalog._catalog-name_.catalog-impl      |                               | The underlying Iceberg catalog implementation. When set, the value of `type` property is ignored |
+| spark.sql.catalog._catalog-name_.type              | `hive` or `hadoop`            | The underlying Iceberg catalog implementation, `HiveCatalog`, `HadoopCatalog` or left unset if using a custom catalog |
+| spark.sql.catalog._catalog-name_.catalog-impl      |                               | The underlying Iceberg catalog implementation.|
 | spark.sql.catalog._catalog-name_.default-namespace | default                       | The default current namespace for the catalog |
 | spark.sql.catalog._catalog-name_.uri               | thrift://host:port            | Metastore connect URI; default from `hive-site.xml` |
 | spark.sql.catalog._catalog-name_.warehouse         | hdfs://nn:8020/warehouse/path | Base path for the warehouse directory |
@@ -104,8 +104,7 @@ spark.sql.catalog.hadoop_prod.hadoop.fs.s3a.endpoint = http://aws-local:9000
 
 ### Loading a custom catalog
 
-Spark supports loading a custom Iceberg `Catalog` implementation by specifying the `catalog-impl` property.
-When `catalog-impl` is set, the value of `type` is ignored. Here is an example:
+Spark supports loading a custom Iceberg `Catalog` implementation by specifying the `catalog-impl` property. Here is an example:
 
 ```plain
 spark.sql.catalog.custom_prod = org.apache.iceberg.spark.SparkCatalog