You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2023/04/03 17:13:00 UTC

[iceberg] branch master updated: Core: Improve bit density in object storage layout (#7128)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new a4a07ba0cd Core: Improve bit density in object storage layout (#7128)
a4a07ba0cd is described below

commit a4a07ba0cd49d1679a76a79b468be11355a4f2c2
Author: Prashant Singh <35...@users.noreply.github.com>
AuthorDate: Mon Apr 3 10:12:52 2023 -0700

    Core: Improve bit density in object storage layout (#7128)
    
    Co-authored-by: Prashant Singh <ps...@amazon.com>
    Co-authored-by: Ryan Blue <bl...@apache.org>
---
 .../java/org/apache/iceberg/LocationProviders.java | 26 +++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java
index 61ad1c2a57..8a37b508eb 100644
--- a/core/src/main/java/org/apache/iceberg/LocationProviders.java
+++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java
@@ -18,14 +18,16 @@
  */
 package org.apache.iceberg;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Map;
-import java.util.function.Function;
 import org.apache.hadoop.fs.Path;
 import org.apache.iceberg.common.DynConstructors;
 import org.apache.iceberg.io.LocationProvider;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.transforms.Transforms;
-import org.apache.iceberg.types.Types;
+import org.apache.iceberg.relocated.com.google.common.hash.HashCode;
+import org.apache.iceberg.relocated.com.google.common.hash.HashFunction;
+import org.apache.iceberg.relocated.com.google.common.hash.Hashing;
+import org.apache.iceberg.relocated.com.google.common.io.BaseEncoding;
 import org.apache.iceberg.util.LocationUtil;
 import org.apache.iceberg.util.PropertyUtil;
 
@@ -104,9 +106,10 @@ public class LocationProviders {
   }
 
   static class ObjectStoreLocationProvider implements LocationProvider {
-    private static final Function<Object, Integer> HASH_FUNC =
-        Transforms.bucket(Integer.MAX_VALUE).bind(Types.StringType.get());
 
+    private static final HashFunction HASH_FUNC = Hashing.murmur3_32_fixed();
+    private static final BaseEncoding BASE64_ENCODER = BaseEncoding.base64Url().omitPadding();
+    private final ThreadLocal<byte[]> temp = ThreadLocal.withInitial(() -> new byte[4]);
     private final String storageLocation;
     private final String context;
 
@@ -143,11 +146,11 @@ public class LocationProviders {
 
     @Override
     public String newDataLocation(String filename) {
-      int hash = HASH_FUNC.apply(filename);
+      String hash = computeHash(filename);
       if (context != null) {
-        return String.format("%s/%08x/%s/%s", storageLocation, hash, context, filename);
+        return String.format("%s/%s/%s/%s", storageLocation, hash, context, filename);
       } else {
-        return String.format("%s/%08x/%s", storageLocation, hash, filename);
+        return String.format("%s/%s/%s", storageLocation, hash, filename);
       }
     }
 
@@ -167,5 +170,12 @@ public class LocationProviders {
 
       return resolvedContext;
     }
+
+    private String computeHash(String fileName) {
+      byte[] bytes = temp.get();
+      HashCode hash = HASH_FUNC.hashString(fileName, StandardCharsets.UTF_8);
+      hash.writeBytesTo(bytes, 0, 4);
+      return BASE64_ENCODER.encode(bytes);
+    }
   }
 }