You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2021/10/12 14:51:34 UTC
[iceberg] branch master updated: Core: Fail if both Catalog type
and catalog-impl are configured (#3162)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new c5eeb97 Core: Fail if both Catalog type and catalog-impl are configured (#3162)
c5eeb97 is described below
commit c5eeb97085f22927d87d1ef1d4791256c0eb7ad5
Author: Omar Al-Safi <om...@gmail.com>
AuthorDate: Tue Oct 12 16:51:07 2021 +0200
Core: Fail if both Catalog type and catalog-impl are configured (#3162)
---
core/src/main/java/org/apache/iceberg/CatalogUtil.java | 5 +++++
core/src/test/java/org/apache/iceberg/TestCatalogUtil.java | 12 ++++++++++++
mr/src/main/java/org/apache/iceberg/mr/Catalogs.java | 10 ++++++----
mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java | 11 +++++++++++
site/docs/hive.md | 4 ++--
site/docs/spark-configuration.md | 7 +++----
6 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
index 4e477b8..bada6fd 100644
--- a/core/src/main/java/org/apache/iceberg/CatalogUtil.java
+++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
@@ -220,6 +220,11 @@ public class CatalogUtil {
default:
throw new UnsupportedOperationException("Unknown catalog type: " + catalogType);
}
+ } else {
+ String catalogType = options.get(ICEBERG_CATALOG_TYPE);
+ Preconditions.checkArgument(catalogType == null,
+ "Cannot create catalog %s, both type and catalog-impl are set: type=%s, catalog-impl=%s",
+ name, catalogType, catalogImpl);
}
return CatalogUtil.loadCatalog(catalogImpl, name, options, conf);
diff --git a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
index a008f9b..eb393cf 100644
--- a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
+++ b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
@@ -159,6 +159,18 @@ public class TestCatalogUtil {
() -> CatalogUtil.loadFileIO(TestFileIONotImpl.class.getName(), Maps.newHashMap(), null));
}
+ @Test
+ public void buildCustomCatalog_withTypeSet() {
+ Map<String, String> options = new HashMap<>();
+ options.put(CatalogProperties.CATALOG_IMPL, "CustomCatalog");
+ options.put(CatalogUtil.ICEBERG_CATALOG_TYPE, "hive");
+ Configuration hadoopConf = new Configuration();
+ String name = "custom";
+
+ AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class,
+ "both type and catalog-impl are set", () -> CatalogUtil.buildIcebergCatalog(name, options, hadoopConf));
+ }
+
public static class TestCatalog extends BaseMetastoreCatalog {
private String catalogName;
diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index 2e54cf5..e02a070 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -202,7 +202,11 @@ public final class Catalogs {
*/
public static boolean hiveCatalog(Configuration conf, Properties props) {
String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
- return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(getCatalogType(conf, catalogName));
+ String catalogType = getCatalogType(conf, catalogName);
+ if (catalogType != null) {
+ return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(catalogType);
+ }
+ return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(getCatalogType(conf, ICEBERG_DEFAULT_CATALOG_NAME));
}
@VisibleForTesting
@@ -279,9 +283,7 @@ public final class Catalogs {
}
} else {
String catalogType = conf.get(InputFormatConfig.CATALOG);
- if (catalogType == null) {
- return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE;
- } else if (catalogType.equals(LOCATION)) {
+ if (catalogType != null && catalogType.equals(LOCATION)) {
return NO_CATALOG_TYPE;
} else {
return catalogType;
diff --git a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
index 0d5b1d6..6ac9808 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
@@ -257,6 +257,17 @@ public class TestCatalogs {
}
@Test
+ public void testLegacyLoadCustomCatalogWithHiveCatalogTypeSet() {
+ String catalogName = "barCatalog";
+ conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE),
+ CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE);
+ conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName());
+ conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation");
+ AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class,
+ "both type and catalog-impl are set", () -> Catalogs.loadCatalog(conf, catalogName));
+ }
+
+ @Test
public void testLoadCatalogHadoop() {
String catalogName = "barCatalog";
conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE),
diff --git a/site/docs/hive.md b/site/docs/hive.md
index ea4b90a..b05f389 100644
--- a/site/docs/hive.md
+++ b/site/docs/hive.md
@@ -98,8 +98,8 @@ To globally register different catalogs, set the following Hadoop configurations
| Config Key | Description |
| --------------------------------------------- | ------------------------------------------------------ |
-| iceberg.catalog.<catalog_name\>.type | type of catalog: `hive` or `hadoop` |
-| iceberg.catalog.<catalog_name\>.catalog-impl | catalog implementation, must not be null if type is null |
+| iceberg.catalog.<catalog_name\>.type | type of catalog: `hive`, `hadoop`, or left unset if using a custom catalog |
+| iceberg.catalog.<catalog_name\>.catalog-impl | catalog implementation, must not be null if type is empty |
| iceberg.catalog.<catalog_name\>.<key\> | any config key and value pairs for the catalog |
Here are some examples using Hive CLI:
diff --git a/site/docs/spark-configuration.md b/site/docs/spark-configuration.md
index 93a8759..67befea 100644
--- a/site/docs/spark-configuration.md
+++ b/site/docs/spark-configuration.md
@@ -54,8 +54,8 @@ Both catalogs are configured using properties nested under the catalog name. Com
| Property | Values | Description |
| -------------------------------------------------- | ----------------------------- | -------------------------------------------------------------------- |
-| spark.sql.catalog._catalog-name_.type | `hive` or `hadoop` | The underlying Iceberg catalog implementation, `HiveCatalog` or `HadoopCatalog` |
-| spark.sql.catalog._catalog-name_.catalog-impl | | The underlying Iceberg catalog implementation. When set, the value of `type` property is ignored |
+| spark.sql.catalog._catalog-name_.type | `hive` or `hadoop` | The underlying Iceberg catalog implementation, `HiveCatalog`, `HadoopCatalog` or left unset if using a custom catalog |
+| spark.sql.catalog._catalog-name_.catalog-impl | | The underlying Iceberg catalog implementation.|
| spark.sql.catalog._catalog-name_.default-namespace | default | The default current namespace for the catalog |
| spark.sql.catalog._catalog-name_.uri | thrift://host:port | Metastore connect URI; default from `hive-site.xml` |
| spark.sql.catalog._catalog-name_.warehouse | hdfs://nn:8020/warehouse/path | Base path for the warehouse directory |
@@ -104,8 +104,7 @@ spark.sql.catalog.hadoop_prod.hadoop.fs.s3a.endpoint = http://aws-local:9000
### Loading a custom catalog
-Spark supports loading a custom Iceberg `Catalog` implementation by specifying the `catalog-impl` property.
-When `catalog-impl` is set, the value of `type` is ignored. Here is an example:
+Spark supports loading a custom Iceberg `Catalog` implementation by specifying the `catalog-impl` property. Here is an example:
```plain
spark.sql.catalog.custom_prod = org.apache.iceberg.spark.SparkCatalog