You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2023/04/03 17:13:00 UTC
[iceberg] branch master updated: Core: Improve bit density in object storage layout (#7128)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new a4a07ba0cd Core: Improve bit density in object storage layout (#7128)
a4a07ba0cd is described below
commit a4a07ba0cd49d1679a76a79b468be11355a4f2c2
Author: Prashant Singh <35...@users.noreply.github.com>
AuthorDate: Mon Apr 3 10:12:52 2023 -0700
Core: Improve bit density in object storage layout (#7128)
Co-authored-by: Prashant Singh <ps...@amazon.com>
Co-authored-by: Ryan Blue <bl...@apache.org>
---
.../java/org/apache/iceberg/LocationProviders.java | 26 +++++++++++++++-------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java
index 61ad1c2a57..8a37b508eb 100644
--- a/core/src/main/java/org/apache/iceberg/LocationProviders.java
+++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java
@@ -18,14 +18,16 @@
*/
package org.apache.iceberg;
+import java.nio.charset.StandardCharsets;
import java.util.Map;
-import java.util.function.Function;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.common.DynConstructors;
import org.apache.iceberg.io.LocationProvider;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.transforms.Transforms;
-import org.apache.iceberg.types.Types;
+import org.apache.iceberg.relocated.com.google.common.hash.HashCode;
+import org.apache.iceberg.relocated.com.google.common.hash.HashFunction;
+import org.apache.iceberg.relocated.com.google.common.hash.Hashing;
+import org.apache.iceberg.relocated.com.google.common.io.BaseEncoding;
import org.apache.iceberg.util.LocationUtil;
import org.apache.iceberg.util.PropertyUtil;
@@ -104,9 +106,10 @@ public class LocationProviders {
}
static class ObjectStoreLocationProvider implements LocationProvider {
- private static final Function<Object, Integer> HASH_FUNC =
- Transforms.bucket(Integer.MAX_VALUE).bind(Types.StringType.get());
+ private static final HashFunction HASH_FUNC = Hashing.murmur3_32_fixed();
+ private static final BaseEncoding BASE64_ENCODER = BaseEncoding.base64Url().omitPadding();
+ private final ThreadLocal<byte[]> temp = ThreadLocal.withInitial(() -> new byte[4]);
private final String storageLocation;
private final String context;
@@ -143,11 +146,11 @@ public class LocationProviders {
@Override
public String newDataLocation(String filename) {
- int hash = HASH_FUNC.apply(filename);
+ String hash = computeHash(filename);
if (context != null) {
- return String.format("%s/%08x/%s/%s", storageLocation, hash, context, filename);
+ return String.format("%s/%s/%s/%s", storageLocation, hash, context, filename);
} else {
- return String.format("%s/%08x/%s", storageLocation, hash, filename);
+ return String.format("%s/%s/%s", storageLocation, hash, filename);
}
}
@@ -167,5 +170,12 @@ public class LocationProviders {
return resolvedContext;
}
+
+ private String computeHash(String fileName) {
+ byte[] bytes = temp.get();
+ HashCode hash = HASH_FUNC.hashString(fileName, StandardCharsets.UTF_8);
+ hash.writeBytesTo(bytes, 0, 4);
+ return BASE64_ENCODER.encode(bytes);
+ }
}
}