You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/10/06 02:38:56 UTC

[iceberg] branch master updated: Core: Allow LocationProvider to be customized using reflection (#1531)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 61aa677  Core: Allow LocationProvider to be customized using reflection (#1531)
61aa677 is described below

commit 61aa67767511b9acf4534f4cffd795ae2d31ad99
Author: mickjermsurawong-stripe <42...@users.noreply.github.com>
AuthorDate: Mon Oct 5 19:38:48 2020 -0700

    Core: Allow LocationProvider to be customized using reflection (#1531)
---
 .../java/org/apache/iceberg/LocationProviders.java |  25 ++-
 .../java/org/apache/iceberg/TableProperties.java   |   2 +
 .../org/apache/iceberg/TestLocationProvider.java   | 215 +++++++++++++++++++++
 python/iceberg/core/table_properties.py            |   2 +
 site/docs/configuration.md                         |   1 +
 5 files changed, 244 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java
index 422b3ea..56dce3f 100644
--- a/core/src/main/java/org/apache/iceberg/LocationProviders.java
+++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java
@@ -21,6 +21,7 @@ package org.apache.iceberg;
 
 import java.util.Map;
 import org.apache.hadoop.fs.Path;
+import org.apache.iceberg.common.DynConstructors;
 import org.apache.iceberg.io.LocationProvider;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.transforms.Transform;
@@ -36,7 +37,29 @@ public class LocationProviders {
   }
 
   public static LocationProvider locationsFor(String location, Map<String, String> properties) {
-    if (PropertyUtil.propertyAsBoolean(properties,
+    if (properties.containsKey(TableProperties.WRITE_LOCATION_PROVIDER_IMPL)) {
+      String impl = properties.get(TableProperties.WRITE_LOCATION_PROVIDER_IMPL);
+      DynConstructors.Ctor<LocationProvider> ctor;
+      try {
+        ctor = DynConstructors.builder(LocationProvider.class)
+            .impl(impl, String.class, Map.class)
+            .impl(impl).buildChecked(); // fall back to no-arg constructor
+      } catch (NoSuchMethodException e) {
+        throw new IllegalArgumentException(String.format(
+            "Unable to find a constructor for implementation %s of %s. " +
+                "Make sure the implementation is in classpath, and that it either " +
+                "has a public no-arg constructor or a two-arg constructor " +
+                "taking in the string base table location and its property string map.",
+            impl, LocationProvider.class), e);
+      }
+      try {
+        return ctor.newInstance(location, properties);
+      } catch (ClassCastException e) {
+        throw new IllegalArgumentException(
+            String.format("Provided implementation for dynamic instantiation should implement %s.",
+                LocationProvider.class), e);
+      }
+    } else if (PropertyUtil.propertyAsBoolean(properties,
         TableProperties.OBJECT_STORE_ENABLED,
         TableProperties.OBJECT_STORE_ENABLED_DEFAULT)) {
       return new ObjectStoreLocationProvider(location, properties);
diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java
index 6193b0c..e356a6c 100644
--- a/core/src/main/java/org/apache/iceberg/TableProperties.java
+++ b/core/src/main/java/org/apache/iceberg/TableProperties.java
@@ -89,6 +89,8 @@ public class TableProperties {
 
   public static final String OBJECT_STORE_PATH = "write.object-storage.path";
 
+  public static final String WRITE_LOCATION_PROVIDER_IMPL = "write.location-provider.impl";
+
   // This only applies to files written after this property is set. Files previously written aren't
   // relocated to reflect this parameter.
   // If not set, defaults to a "data" folder underneath the root path of the table.
diff --git a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java
new file mode 100644
index 0000000..a07407f
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.util.Map;
+import org.apache.iceberg.io.LocationProvider;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestLocationProvider extends TableTestBase {
+  @Parameterized.Parameters
+  public static Object[][] parameters() {
+    return new Object[][] {
+        new Object[] { 1 },
+        new Object[] { 2 },
+    };
+  }
+
+  public TestLocationProvider(int formatVersion) {
+    super(formatVersion);
+  }
+
+  // publicly visible for testing to be dynamically loaded
+  public static class TwoArgDynamicallyLoadedLocationProvider implements LocationProvider {
+    String tableLocation;
+    Map<String, String> properties;
+
+    public TwoArgDynamicallyLoadedLocationProvider(String tableLocation, Map<String, String> properties) {
+      this.tableLocation = tableLocation;
+      this.properties = properties;
+    }
+
+    @Override
+    public String newDataLocation(String filename) {
+      return String.format("%s/test_custom_provider/%s", this.tableLocation, filename);
+    }
+
+    @Override
+    public String newDataLocation(PartitionSpec spec, StructLike partitionData, String filename) {
+      throw new RuntimeException("Test custom provider does not expect any invocation");
+    }
+  }
+
+  // publicly visible for testing to be dynamically loaded
+  public static class NoArgDynamicallyLoadedLocationProvider implements LocationProvider {
+    // No-arg public constructor
+
+    @Override
+    public String newDataLocation(String filename) {
+      return String.format("test_no_arg_provider/%s", filename);
+    }
+
+    @Override
+    public String newDataLocation(PartitionSpec spec, StructLike partitionData, String filename) {
+      throw new RuntimeException("Test custom provider does not expect any invocation");
+    }
+  }
+
+  // publicly visible for testing to be dynamically loaded
+  public static class InvalidArgTypesDynamicallyLoadedLocationProvider implements LocationProvider {
+
+    public InvalidArgTypesDynamicallyLoadedLocationProvider(Integer bogusArg1, String bogusArg2) {
+    }
+
+    @Override
+    public String newDataLocation(String filename) {
+      throw new RuntimeException("Invalid provider should have not been instantiated!");
+    }
+
+    @Override
+    public String newDataLocation(PartitionSpec spec, StructLike partitionData, String filename) {
+      throw new RuntimeException("Invalid provider should have not been instantiated!");
+    }
+  }
+
+  // publicly visible for testing to be dynamically loaded
+  public static class InvalidNoInterfaceDynamicallyLoadedLocationProvider {
+    // Default no-arg constructor is present, but does not impelemnt interface LocationProvider
+  }
+
+  @Test
+  public void testDefaultLocationProvider() {
+    this.table.updateProperties()
+        .commit();
+
+    this.table.locationProvider().newDataLocation("my_file");
+    Assert.assertEquals(
+        "Default data path should have table location as root",
+        String.format("%s/data/%s", this.table.location(), "my_file"),
+        this.table.locationProvider().newDataLocation("my_file")
+    );
+  }
+
+  @Test
+  public void testDefaultLocationProviderWithCustomDataLocation() {
+    this.table.updateProperties()
+        .set(TableProperties.WRITE_NEW_DATA_LOCATION, "new_location")
+        .commit();
+
+    this.table.locationProvider().newDataLocation("my_file");
+    Assert.assertEquals(
+        "Default location provider should allow custom path location",
+        "new_location/my_file",
+        this.table.locationProvider().newDataLocation("my_file")
+    );
+  }
+
+  @Test
+  public void testNoArgDynamicallyLoadedLocationProvider() {
+    String invalidImpl = String.format("%s$%s",
+        this.getClass().getCanonicalName(),
+        NoArgDynamicallyLoadedLocationProvider.class.getSimpleName());
+    this.table.updateProperties()
+        .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, invalidImpl)
+        .commit();
+
+    Assert.assertEquals(
+        "Custom provider should take base table location",
+        "test_no_arg_provider/my_file",
+        this.table.locationProvider().newDataLocation("my_file")
+    );
+  }
+
+  @Test
+  public void testTwoArgDynamicallyLoadedLocationProvider() {
+    this.table.updateProperties()
+        .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL,
+            String.format("%s$%s",
+                this.getClass().getCanonicalName(),
+                TwoArgDynamicallyLoadedLocationProvider.class.getSimpleName()))
+        .commit();
+
+    Assert.assertTrue(String.format("Table should load impl defined in its properties"),
+        this.table.locationProvider() instanceof TwoArgDynamicallyLoadedLocationProvider
+    );
+
+    Assert.assertEquals(
+        "Custom provider should take base table location",
+        String.format("%s/test_custom_provider/%s", this.table.location(), "my_file"),
+        this.table.locationProvider().newDataLocation("my_file")
+    );
+  }
+
+  @Test
+  public void testDynamicallyLoadedLocationProviderNotFound() {
+    String nonExistentImpl = String.format("%s$NonExistent%s",
+        this.getClass().getCanonicalName(),
+        TwoArgDynamicallyLoadedLocationProvider.class.getSimpleName());
+    this.table.updateProperties()
+        .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, nonExistentImpl)
+        .commit();
+
+    AssertHelpers.assertThrows("Non-existent implementation should fail on finding constructor",
+        IllegalArgumentException.class,
+        String.format("Unable to find a constructor for implementation %s of %s. ",
+            nonExistentImpl, LocationProvider.class),
+        () -> table.locationProvider()
+    );
+  }
+
+  @Test
+  public void testInvalidNoInterfaceDynamicallyLoadedLocationProvider() {
+    String invalidImpl = String.format("%s$%s",
+        this.getClass().getCanonicalName(),
+        InvalidNoInterfaceDynamicallyLoadedLocationProvider.class.getSimpleName());
+    this.table.updateProperties()
+        .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, invalidImpl)
+        .commit();
+
+    AssertHelpers.assertThrows(
+        "Class with missing interface implementation should fail on instantiation.",
+        IllegalArgumentException.class,
+        String.format("Provided implementation for dynamic instantiation should implement %s",
+            LocationProvider.class),
+        () -> table.locationProvider()
+    );
+  }
+
+  @Test
+  public void testInvalidArgTypesDynamicallyLoadedLocationProvider() {
+    String invalidImpl = String.format("%s$%s",
+        this.getClass().getCanonicalName(),
+        InvalidArgTypesDynamicallyLoadedLocationProvider.class.getSimpleName());
+    this.table.updateProperties()
+        .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, invalidImpl)
+        .commit();
+
+    AssertHelpers.assertThrows("Implementation with invalid arg types should fail on finding constructor",
+        IllegalArgumentException.class,
+        String.format("Unable to find a constructor for implementation %s of %s. ",
+            invalidImpl, LocationProvider.class),
+        () -> table.locationProvider()
+    );
+  }
+}
diff --git a/python/iceberg/core/table_properties.py b/python/iceberg/core/table_properties.py
index a50546c..da75eff 100644
--- a/python/iceberg/core/table_properties.py
+++ b/python/iceberg/core/table_properties.py
@@ -67,6 +67,8 @@ class TableProperties(object):
 
     OBJECT_STORE_PATH = "write.object-storage.path"
 
+    WRITE_LOCATION_PROVIDER_IMPL = "write.location-provider.impl"
+
     WRITE_NEW_DATA_LOCATION = "write.folder-storage.path"
 
     WRITE_METADATA_LOCATION = "write.metadata.path"
diff --git a/site/docs/configuration.md b/site/docs/configuration.md
index 2badc75..2229359 100644
--- a/site/docs/configuration.md
+++ b/site/docs/configuration.md
@@ -41,6 +41,7 @@ Iceberg tables support table properties to configure table behavior, like the de
 | write.parquet.compression-codec    | gzip               | Parquet compression codec                          |
 | write.parquet.compression-level    | null               | Parquet compression level                          |
 | write.avro.compression-codec       | gzip               | Avro compression codec                             |
+| write.location-provider.impl       | null               | Optional custom implemention for LocationProvider  |
 | write.metadata.compression-codec   | none               | Metadata compression codec; none or gzip           |
 | write.metadata.metrics.default     | truncate(16)       | Default metrics mode for all columns in the table; none, counts, truncate(length), or full |
 | write.metadata.metrics.column.col1 | (not set)          | Metrics mode for column 'col1' to allow per-column tuning; none, counts, truncate(length), or full |