You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ja...@apache.org on 2021/12/14 23:37:37 UTC
[iceberg] branch master updated: Spark: Backport SpakTestBaseWithCatalog to Spark 3.0 to simplify running tests with only one catalog (#3736)

This is an automated email from the ASF dual-hosted git repository.

jackye pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e8efde  Spark: Backport SpakTestBaseWithCatalog to Spark 3.0 to simplify running tests with only one catalog (#3736)
2e8efde is described below

commit 2e8efde76171d0b56c6d0b209a997e4c176e7fc2
Author: Kyle Bendickson <kj...@gmail.com>
AuthorDate: Tue Dec 14 15:37:28 2021 -0800

    Spark: Backport SpakTestBaseWithCatalog to Spark 3.0 to simplify running tests with only one catalog (#3736)
---
 .../apache/iceberg/spark/SparkCatalogConfig.java   | 61 +++++++++++++++
 .../apache/iceberg/spark/SparkCatalogTestBase.java | 88 +++++-----------------
 ...TestBase.java => SparkTestBaseWithCatalog.java} | 40 +++-------
 3 files changed, 91 insertions(+), 98 deletions(-)

diff --git a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogConfig.java b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogConfig.java
new file mode 100644
index 0000000..5d5dfeb
--- /dev/null
+++ b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogConfig.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.spark;
+
+import java.util.Map;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+
+public enum SparkCatalogConfig {
+  HIVE("testhive", SparkCatalog.class.getName(), ImmutableMap.of(
+      "type", "hive",
+      "default-namespace", "default"
+  )),
+  HADOOP("testhadoop", SparkCatalog.class.getName(), ImmutableMap.of(
+      "type", "hadoop"
+  )),
+  SPARK("spark_catalog", SparkSessionCatalog.class.getName(), ImmutableMap.of(
+      "type", "hive",
+      "default-namespace", "default",
+      "parquet-enabled", "true",
+      "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync
+  ));
+
+  private final String catalogName;
+  private final String implementation;
+  private final Map<String, String> properties;
+
+  SparkCatalogConfig(String catalogName, String implementation, Map<String, String> properties) {
+    this.catalogName = catalogName;
+    this.implementation = implementation;
+    this.properties = properties;
+  }
+
+  public String catalogName() {
+    return catalogName;
+  }
+
+  public String implementation() {
+    return implementation;
+  }
+
+  public Map<String, String> properties() {
+    return properties;
+  }
+}
diff --git a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
index 106ba12..774e813 100644
--- a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
+++ b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
@@ -19,91 +19,41 @@
 
 package org.apache.iceberg.spark;
 
-import java.io.File;
-import java.io.IOException;
 import java.util.Map;
-import org.apache.iceberg.catalog.Catalog;
-import org.apache.iceberg.catalog.Namespace;
-import org.apache.iceberg.catalog.SupportsNamespaces;
-import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.hadoop.HadoopCatalog;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
 @RunWith(Parameterized.class)
-public abstract class SparkCatalogTestBase extends SparkTestBase {
-  private static File warehouse = null;
-
-  @BeforeClass
-  public static void createWarehouse() throws IOException {
-    SparkCatalogTestBase.warehouse = File.createTempFile("warehouse", null);
-    Assert.assertTrue(warehouse.delete());
-  }
-
-  @AfterClass
-  public static void dropWarehouse() {
-    if (warehouse != null && warehouse.exists()) {
-      warehouse.delete();
-    }
-  }
+public abstract class SparkCatalogTestBase extends SparkTestBaseWithCatalog {
 
+  // these parameters are broken out to avoid changes that need to modify lots of test suites
   @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
   public static Object[][] parameters() {
-    return new Object[][] {
-        { "testhive", SparkCatalog.class.getName(),
-          ImmutableMap.of(
-            "type", "hive",
-            "default-namespace", "default"
-         ) },
-        { "testhadoop", SparkCatalog.class.getName(),
-          ImmutableMap.of(
-            "type", "hadoop"
-        ) },
-        { "spark_catalog", SparkSessionCatalog.class.getName(),
-          ImmutableMap.of(
-            "type", "hive",
-            "default-namespace", "default",
-            "parquet-enabled", "true",
-            "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync
-        ) }
-    };
+    return new Object[][] {{
+                               SparkCatalogConfig.HIVE.catalogName(),
+                               SparkCatalogConfig.HIVE.implementation(),
+                               SparkCatalogConfig.HIVE.properties()
+                           }, {
+                               SparkCatalogConfig.HADOOP.catalogName(),
+                               SparkCatalogConfig.HADOOP.implementation(),
+                               SparkCatalogConfig.HADOOP.properties()
+                           }, {
+                               SparkCatalogConfig.SPARK.catalogName(),
+                               SparkCatalogConfig.SPARK.implementation(),
+                               SparkCatalogConfig.SPARK.properties()
+                           }};
   }
 
   @Rule
   public TemporaryFolder temp = new TemporaryFolder();
 
-  protected final String catalogName;
-  protected final Catalog validationCatalog;
-  protected final SupportsNamespaces validationNamespaceCatalog;
-  protected final TableIdentifier tableIdent = TableIdentifier.of(Namespace.of("default"), "table");
-  protected final String tableName;
-
-  public SparkCatalogTestBase(String catalogName, String implementation, Map<String, String> config) {
-    this.catalogName = catalogName;
-    this.validationCatalog = catalogName.equals("testhadoop") ?
-        new HadoopCatalog(spark.sessionState().newHadoopConf(), "file:" + warehouse) :
-        catalog;
-    this.validationNamespaceCatalog = (SupportsNamespaces) validationCatalog;
-
-    spark.conf().set("spark.sql.catalog." + catalogName, implementation);
-    config.forEach((key, value) -> spark.conf().set("spark.sql.catalog." + catalogName + "." + key, value));
-
-    if (config.get("type").equalsIgnoreCase("hadoop")) {
-      spark.conf().set("spark.sql.catalog." + catalogName + ".warehouse", "file:" + warehouse);
-    }
-
-    this.tableName = (catalogName.equals("spark_catalog") ? "" : catalogName + ".") + "default.table";
-
-    sql("CREATE NAMESPACE IF NOT EXISTS default");
+  public SparkCatalogTestBase(SparkCatalogConfig config) {
+    super(config);
   }
 
-  protected String tableName(String name) {
-    return (catalogName.equals("spark_catalog") ? "" : catalogName + ".") + "default." + name;
+  public SparkCatalogTestBase(String catalogName, String implementation, Map<String, String> config) {
+    super(catalogName, implementation, config);
   }
 }
diff --git a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
similarity index 70%
copy from spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
copy to spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
index 106ba12..00dcd95 100644
--- a/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
+++ b/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
@@ -27,22 +27,18 @@ import org.apache.iceberg.catalog.Namespace;
 import org.apache.iceberg.catalog.SupportsNamespaces;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.hadoop.HadoopCatalog;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
 
-@RunWith(Parameterized.class)
-public abstract class SparkCatalogTestBase extends SparkTestBase {
+public abstract class SparkTestBaseWithCatalog extends SparkTestBase {
   private static File warehouse = null;
 
   @BeforeClass
   public static void createWarehouse() throws IOException {
-    SparkCatalogTestBase.warehouse = File.createTempFile("warehouse", null);
+    SparkTestBaseWithCatalog.warehouse = File.createTempFile("warehouse", null);
     Assert.assertTrue(warehouse.delete());
   }
 
@@ -53,28 +49,6 @@ public abstract class SparkCatalogTestBase extends SparkTestBase {
     }
   }
 
-  @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
-  public static Object[][] parameters() {
-    return new Object[][] {
-        { "testhive", SparkCatalog.class.getName(),
-          ImmutableMap.of(
-            "type", "hive",
-            "default-namespace", "default"
-         ) },
-        { "testhadoop", SparkCatalog.class.getName(),
-          ImmutableMap.of(
-            "type", "hadoop"
-        ) },
-        { "spark_catalog", SparkSessionCatalog.class.getName(),
-          ImmutableMap.of(
-            "type", "hive",
-            "default-namespace", "default",
-            "parquet-enabled", "true",
-            "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync
-        ) }
-    };
-  }
-
   @Rule
   public TemporaryFolder temp = new TemporaryFolder();
 
@@ -84,7 +58,15 @@ public abstract class SparkCatalogTestBase extends SparkTestBase {
   protected final TableIdentifier tableIdent = TableIdentifier.of(Namespace.of("default"), "table");
   protected final String tableName;
 
-  public SparkCatalogTestBase(String catalogName, String implementation, Map<String, String> config) {
+  public SparkTestBaseWithCatalog() {
+    this(SparkCatalogConfig.HADOOP);
+  }
+
+  public SparkTestBaseWithCatalog(SparkCatalogConfig config) {
+    this(config.catalogName(), config.implementation(), config.properties());
+  }
+
+  public SparkTestBaseWithCatalog(String catalogName, String implementation, Map<String, String> config) {
     this.catalogName = catalogName;
     this.validationCatalog = catalogName.equals("testhadoop") ?
         new HadoopCatalog(spark.sessionState().newHadoopConf(), "file:" + warehouse) :