You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ja...@apache.org on 2021/12/14 23:37:37 UTC
[iceberg] branch master updated: Spark: Backport SpakTestBaseWithCatalog to Spark 3.0 to simplify running tests with only one catalog (#3736)
This is an automated email from the ASF dual-hosted git repository.
jackye pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 2e8efde Spark: Backport SpakTestBaseWithCatalog to Spark 3.0 to simplify running tests with only one catalog (#3736)
2e8efde is described below
commit 2e8efde76171d0b56c6d0b209a997e4c176e7fc2
Author: Kyle Bendickson <kj...@gmail.com>
AuthorDate: Tue Dec 14 15:37:28 2021 -0800
Spark: Backport SpakTestBaseWithCatalog to Spark 3.0 to simplify running tests with only one catalog (#3736)
---
.../apache/iceberg/spark/SparkCatalogConfig.java | 61 +++++++++++++++
.../apache/iceberg/spark/SparkCatalogTestBase.java | 88 +++++-----------------
...TestBase.java => SparkTestBaseWithCatalog.java} | 40 +++-------
3 files changed, 91 insertions(+), 98 deletions(-)
diff --git a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogConfig.java b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogConfig.java
new file mode 100644
index 0000000..5d5dfeb
--- /dev/null
+++ b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogConfig.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.spark;
+
+import java.util.Map;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+
+public enum SparkCatalogConfig {
+ HIVE("testhive", SparkCatalog.class.getName(), ImmutableMap.of(
+ "type", "hive",
+ "default-namespace", "default"
+ )),
+ HADOOP("testhadoop", SparkCatalog.class.getName(), ImmutableMap.of(
+ "type", "hadoop"
+ )),
+ SPARK("spark_catalog", SparkSessionCatalog.class.getName(), ImmutableMap.of(
+ "type", "hive",
+ "default-namespace", "default",
+ "parquet-enabled", "true",
+ "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync
+ ));
+
+ private final String catalogName;
+ private final String implementation;
+ private final Map<String, String> properties;
+
+ SparkCatalogConfig(String catalogName, String implementation, Map<String, String> properties) {
+ this.catalogName = catalogName;
+ this.implementation = implementation;
+ this.properties = properties;
+ }
+
+ public String catalogName() {
+ return catalogName;
+ }
+
+ public String implementation() {
+ return implementation;
+ }
+
+ public Map<String, String> properties() {
+ return properties;
+ }
+}
diff --git a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
index 106ba12..774e813 100644
--- a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
+++ b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
@@ -19,91 +19,41 @@
package org.apache.iceberg.spark;
-import java.io.File;
-import java.io.IOException;
import java.util.Map;
-import org.apache.iceberg.catalog.Catalog;
-import org.apache.iceberg.catalog.Namespace;
-import org.apache.iceberg.catalog.SupportsNamespaces;
-import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.hadoop.HadoopCatalog;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
-public abstract class SparkCatalogTestBase extends SparkTestBase {
- private static File warehouse = null;
-
- @BeforeClass
- public static void createWarehouse() throws IOException {
- SparkCatalogTestBase.warehouse = File.createTempFile("warehouse", null);
- Assert.assertTrue(warehouse.delete());
- }
-
- @AfterClass
- public static void dropWarehouse() {
- if (warehouse != null && warehouse.exists()) {
- warehouse.delete();
- }
- }
+public abstract class SparkCatalogTestBase extends SparkTestBaseWithCatalog {
+ // these parameters are broken out to avoid changes that need to modify lots of test suites
@Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
public static Object[][] parameters() {
- return new Object[][] {
- { "testhive", SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default"
- ) },
- { "testhadoop", SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hadoop"
- ) },
- { "spark_catalog", SparkSessionCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default",
- "parquet-enabled", "true",
- "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync
- ) }
- };
+ return new Object[][] {{
+ SparkCatalogConfig.HIVE.catalogName(),
+ SparkCatalogConfig.HIVE.implementation(),
+ SparkCatalogConfig.HIVE.properties()
+ }, {
+ SparkCatalogConfig.HADOOP.catalogName(),
+ SparkCatalogConfig.HADOOP.implementation(),
+ SparkCatalogConfig.HADOOP.properties()
+ }, {
+ SparkCatalogConfig.SPARK.catalogName(),
+ SparkCatalogConfig.SPARK.implementation(),
+ SparkCatalogConfig.SPARK.properties()
+ }};
}
@Rule
public TemporaryFolder temp = new TemporaryFolder();
- protected final String catalogName;
- protected final Catalog validationCatalog;
- protected final SupportsNamespaces validationNamespaceCatalog;
- protected final TableIdentifier tableIdent = TableIdentifier.of(Namespace.of("default"), "table");
- protected final String tableName;
-
- public SparkCatalogTestBase(String catalogName, String implementation, Map<String, String> config) {
- this.catalogName = catalogName;
- this.validationCatalog = catalogName.equals("testhadoop") ?
- new HadoopCatalog(spark.sessionState().newHadoopConf(), "file:" + warehouse) :
- catalog;
- this.validationNamespaceCatalog = (SupportsNamespaces) validationCatalog;
-
- spark.conf().set("spark.sql.catalog." + catalogName, implementation);
- config.forEach((key, value) -> spark.conf().set("spark.sql.catalog." + catalogName + "." + key, value));
-
- if (config.get("type").equalsIgnoreCase("hadoop")) {
- spark.conf().set("spark.sql.catalog." + catalogName + ".warehouse", "file:" + warehouse);
- }
-
- this.tableName = (catalogName.equals("spark_catalog") ? "" : catalogName + ".") + "default.table";
-
- sql("CREATE NAMESPACE IF NOT EXISTS default");
+ public SparkCatalogTestBase(SparkCatalogConfig config) {
+ super(config);
}
- protected String tableName(String name) {
- return (catalogName.equals("spark_catalog") ? "" : catalogName + ".") + "default." + name;
+ public SparkCatalogTestBase(String catalogName, String implementation, Map<String, String> config) {
+ super(catalogName, implementation, config);
}
}
diff --git a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
similarity index 70%
copy from spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
copy to spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
index 106ba12..00dcd95 100644
--- a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
+++ b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
@@ -27,22 +27,18 @@ import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.hadoop.HadoopCatalog;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-@RunWith(Parameterized.class)
-public abstract class SparkCatalogTestBase extends SparkTestBase {
+public abstract class SparkTestBaseWithCatalog extends SparkTestBase {
private static File warehouse = null;
@BeforeClass
public static void createWarehouse() throws IOException {
- SparkCatalogTestBase.warehouse = File.createTempFile("warehouse", null);
+ SparkTestBaseWithCatalog.warehouse = File.createTempFile("warehouse", null);
Assert.assertTrue(warehouse.delete());
}
@@ -53,28 +49,6 @@ public abstract class SparkCatalogTestBase extends SparkTestBase {
}
}
- @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
- public static Object[][] parameters() {
- return new Object[][] {
- { "testhive", SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default"
- ) },
- { "testhadoop", SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hadoop"
- ) },
- { "spark_catalog", SparkSessionCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default",
- "parquet-enabled", "true",
- "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync
- ) }
- };
- }
-
@Rule
public TemporaryFolder temp = new TemporaryFolder();
@@ -84,7 +58,15 @@ public abstract class SparkCatalogTestBase extends SparkTestBase {
protected final TableIdentifier tableIdent = TableIdentifier.of(Namespace.of("default"), "table");
protected final String tableName;
- public SparkCatalogTestBase(String catalogName, String implementation, Map<String, String> config) {
+ public SparkTestBaseWithCatalog() {
+ this(SparkCatalogConfig.HADOOP);
+ }
+
+ public SparkTestBaseWithCatalog(SparkCatalogConfig config) {
+ this(config.catalogName(), config.implementation(), config.properties());
+ }
+
+ public SparkTestBaseWithCatalog(String catalogName, String implementation, Map<String, String> config) {
this.catalogName = catalogName;
this.validationCatalog = catalogName.equals("testhadoop") ?
new HadoopCatalog(spark.sessionState().newHadoopConf(), "file:" + warehouse) :