You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@causeway.apache.org by ah...@apache.org on 2023/03/03 10:38:19 UTC

[causeway] branch master updated: CAUSEWAY-3304: adds md5 support to Blob and DataSource

This is an automated email from the ASF dual-hosted git repository.

ahuber pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/causeway.git


The following commit(s) were added to refs/heads/master by this push:
     new f6c538656e CAUSEWAY-3304: adds md5 support to Blob and DataSource
f6c538656e is described below

commit f6c538656e398aa9c090a26378d3af87b507ee20
Author: Andi Huber <ah...@apache.org>
AuthorDate: Fri Mar 3 11:38:13 2023 +0100

    CAUSEWAY-3304: adds md5 support to Blob and DataSource
---
 .../org/apache/causeway/applib/value/Blob.java     |  18 +++
 .../causeway/commons/internal/base/_Bytes.java     |  42 ++++--
 .../org/apache/causeway/commons/io/DataSource.java |  17 +++
 .../org/apache/causeway/commons/io/HashUtils.java  | 158 +++++++++++++++++++++
 4 files changed, 227 insertions(+), 8 deletions(-)

diff --git a/api/applib/src/main/java/org/apache/causeway/applib/value/Blob.java b/api/applib/src/main/java/org/apache/causeway/applib/value/Blob.java
index 7706bb314d..62688c1791 100644
--- a/api/applib/src/main/java/org/apache/causeway/applib/value/Blob.java
+++ b/api/applib/src/main/java/org/apache/causeway/applib/value/Blob.java
@@ -44,6 +44,8 @@ import org.apache.causeway.commons.internal.base._Strings;
 import org.apache.causeway.commons.internal.exceptions._Exceptions;
 import org.apache.causeway.commons.internal.image._Images;
 import org.apache.causeway.commons.io.DataSource;
+import org.apache.causeway.commons.io.HashUtils;
+import org.apache.causeway.commons.io.HashUtils.HashAlgorithm;
 import org.apache.causeway.commons.io.ZipUtils;
 
 import lombok.NonNull;
@@ -270,6 +272,22 @@ public final class Blob implements NamedWithMimeType {
                       .unrecoverable("failed to unzip blob, no entry found %s", getName()));
     }
 
+    // -- HASHING
+
+    public Try<HashUtils.Hash> tryHash(final @NonNull HashAlgorithm hashAlgorithm) {
+        return HashUtils.tryDigest(hashAlgorithm, bytes, 4*1024); // 4k default
+    }
+
+    public Try<HashUtils.Hash> tryMd5() {
+        return tryHash(HashAlgorithm.MD5);
+    }
+
+    public String md5Hex() {
+        return tryMd5()
+                .valueAsNonNullElseFail()
+                .asHexString();
+    }
+
     // -- OBJECT CONTRACT
 
     @Override
diff --git a/commons/src/main/java/org/apache/causeway/commons/internal/base/_Bytes.java b/commons/src/main/java/org/apache/causeway/commons/internal/base/_Bytes.java
index bcf1ca97a0..3007942f76 100644
--- a/commons/src/main/java/org/apache/causeway/commons/internal/base/_Bytes.java
+++ b/commons/src/main/java/org/apache/causeway/commons/internal/base/_Bytes.java
@@ -141,31 +141,57 @@ public final class _Bytes {
 
     // -- TO AND FROM HEX DUMP
 
+    // -- TO AND FROM HEX DUMP
+
     /**
-     * Converts given byte array into a space separated list of 2 character fixed length hex numbers.
+     * Converts given byte array into a delimiter separated list of 2 character fixed length hex numbers.
      * @apiNote future extensions may support pretty printing, but for now the resulting string is just a single line
-     * @see #ofHexDump(String)
+     * @see #ofHexDump(String, String)
      */
-    public static String hexDump(final @Nullable byte[] bytes) {
+    public static String hexDump(final @Nullable byte[] bytes, final @Nullable String delimiter) {
         if(bytes==null) {
             return "";
         }
-        return _Bytes.streamAsInts(bytes).mapToObj(Integer::toHexString).collect(Collectors.joining(" "));
+        return _Bytes.streamAsInts(bytes).mapToObj(Integer::toHexString)
+                .collect(Collectors.joining(_Strings.nullToEmpty(delimiter)));
     }
 
     /**
-     * Converts given space separated list of 2 character fixed length hex numbers into a byte array.
-     * @see #hexDump(byte[])
+     * Shortcut for {@code hexDump(bytes, " ")} using space as delimiter.
+     * @see #hexDump(byte[], String)
      */
-    public static byte[] ofHexDump(final @Nullable String hexDump) {
+    public static String hexDump(final @Nullable byte[] bytes) {
+        return hexDump(bytes, " ");
+    }
+
+    /**
+     * Converts given delimiter separated list of 2 character fixed length hex numbers into a byte array.
+     * @see #hexDump(byte[], String)
+     */
+    public static byte[] ofHexDump(final @Nullable String hexDump, final @Nullable String delimiter) {
         if(hexDump==null) {
             return new byte[0];
         }
-        final IntStream intStream = _Strings.splitThenStream(hexDump, " ")
+        final int delimLen = _NullSafe.size(delimiter);
+        final int stride = 2 + delimLen;
+
+        final IntStream intStream = IntStream.range(0, (hexDump.length() + delimLen)/stride)
+            .mapToObj(i->{
+                final int start = i * stride;
+                return hexDump.substring(start, start + 2);
+            })
             .mapToInt(hex->Integer.parseUnsignedInt(hex, 16));
         return ofIntStream(intStream);
     }
 
+    /**
+     * Shortcut for {@code ofHexDump(hexDump, " ")} using space as delimiter.
+     * @see #ofHexDump(String, String)
+     */
+    public static byte[] ofHexDump(final @Nullable String hexDump) {
+        return ofHexDump(hexDump, " ");
+    }
+
     // -- PREPEND/APPEND
 
     /**
diff --git a/commons/src/main/java/org/apache/causeway/commons/io/DataSource.java b/commons/src/main/java/org/apache/causeway/commons/io/DataSource.java
index 8ddc3c3c0d..a3471f305c 100644
--- a/commons/src/main/java/org/apache/causeway/commons/io/DataSource.java
+++ b/commons/src/main/java/org/apache/causeway/commons/io/DataSource.java
@@ -42,6 +42,7 @@ import org.apache.causeway.commons.internal.base._Bytes;
 import org.apache.causeway.commons.internal.base._NullSafe;
 import org.apache.causeway.commons.internal.base._Strings;
 import org.apache.causeway.commons.internal.base._Text;
+import org.apache.causeway.commons.io.HashUtils.HashAlgorithm;
 
 import lombok.NonNull;
 import lombok.SneakyThrows;
@@ -143,6 +144,22 @@ public interface DataSource {
         return tryReadAndApply(ImageIO::read);
     }
 
+    // -- HASHING
+
+    default Try<HashUtils.Hash> tryHash(final @NonNull HashAlgorithm hashAlgorithm) {
+        return HashUtils.tryDigest(hashAlgorithm, this, 4*1024); // 4k default
+    }
+
+    default Try<HashUtils.Hash> tryMd5() {
+        return tryHash(HashAlgorithm.MD5);
+    }
+
+    default String md5Hex() {
+        return tryMd5()
+                .valueAsNonNullElseFail()
+                .asHexString();
+    }
+
     // -- PIPE
 
     /**
diff --git a/commons/src/main/java/org/apache/causeway/commons/io/HashUtils.java b/commons/src/main/java/org/apache/causeway/commons/io/HashUtils.java
new file mode 100644
index 0000000000..accd4ee8bb
--- /dev/null
+++ b/commons/src/main/java/org/apache/causeway/commons/io/HashUtils.java
@@ -0,0 +1,158 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.causeway.commons.io;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
+
+import org.springframework.lang.Nullable;
+
+import org.apache.causeway.commons.functional.ThrowingSupplier;
+import org.apache.causeway.commons.functional.Try;
+import org.apache.causeway.commons.internal.base._Bytes;
+import org.apache.causeway.commons.internal.base._NullSafe;
+
+import lombok.AccessLevel;
+import lombok.Getter;
+import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
+import lombok.Value;
+import lombok.val;
+import lombok.experimental.Accessors;
+import lombok.experimental.UtilityClass;
+
+/**
+ * Utilities related to byte data hashing algorithms, at least providing MD5.
+ * <p>
+ * Consider <a href="https://commons.apache.org/codec/">Apache Commons Codec</a>
+ * for a more comprehensive suite of digest utilities.
+ *
+ * @since 2.0 {@index}
+ */
+@UtilityClass
+public class HashUtils {
+
+    //XXX record candidate
+    @Value @Accessors(fluent=true)
+    public static class Hash {
+        private final String algorithmName;
+        private final byte[] bytes;
+
+        public byte[] bytes() {
+            // defensive copy
+            return bytes!=null
+                    ? bytes.clone()
+                    : null;
+        }
+
+        public String asHexString() {
+            return _Bytes.hexDump(bytes, "");
+        }
+    }
+
+    @RequiredArgsConstructor
+    public static enum HashAlgorithm {
+        MD5(()->MessageDigest.getInstance("MD5")),
+        SHA1(()->MessageDigest.getInstance("SHA-1")),
+        ;
+
+        @Getter(value = AccessLevel.PRIVATE)
+        private final ThrowingSupplier<MessageDigest> messageDigestSupplier;
+
+        public Try<MessageDigest> tryGetMessageDigest(){
+            return Try.call(getMessageDigestSupplier()::get);
+        }
+    }
+
+    /**
+     * Optimized for when the input byte array is already present in memory.
+     */
+    public Try<Hash> tryDigest(
+            final @NonNull HashAlgorithm algorithm,
+            final @Nullable byte[] bytes,
+            final int buffersize) {
+        return tryDigestAsBytes(algorithm, bytes, buffersize)
+                .mapSuccessAsNullable(digestBytes -> new Hash(algorithm.name(), digestBytes));
+    }
+
+    /**
+     * Optimized for direct {@link InputStream} processing,
+     * if possible, not reading all data into memory at once.
+     */
+    public Try<Hash> tryDigest(
+            final @NonNull HashAlgorithm algorithm,
+            final @NonNull DataSource dataSource,
+            final int buffersize) {
+        return tryDigestAsBytes(algorithm, dataSource, buffersize)
+                .mapSuccessAsNullable(digestBytes -> new Hash(algorithm.name(), digestBytes));
+    }
+
+    // -- HELPER
+
+    /**
+     * Optimized for when the input byte array is already present in memory.
+     */
+    private Try<byte[]> tryDigestAsBytes(
+            final HashAlgorithm algorithm,
+            final byte[] bytes,
+            final int buffersize) {
+
+        return Try.call(()->{
+            if(_NullSafe.isEmpty(bytes)) {
+                return bytes;
+            }
+            val messageDigest = algorithm.tryGetMessageDigest().valueAsNonNullElseFail();
+            try(DigestInputStream dis = new DigestInputStream(new ByteArrayInputStream(bytes), messageDigest)){
+                byte[] buffer = new byte[buffersize];
+                while(dis.read(buffer)>0);
+            }
+            val digestBytes = messageDigest.digest();
+            return digestBytes;
+        });
+    }
+
+    /**
+     * Optimized for direct {@link InputStream} processing,
+     * if possible, not reading all data into memory at once.
+     */
+    private Try<byte[]> tryDigestAsBytes(
+            final HashAlgorithm algorithm,
+            final DataSource dataSource,
+            final int buffersize) {
+
+        return Try.call(()->{
+            val messageDigest = algorithm.tryGetMessageDigest().valueAsNonNullElseFail();
+            val digestBytes = dataSource.tryReadAndApply(inputStream->{
+                if(inputStream==null) {
+                    return null;
+                }
+                try(DigestInputStream dis = new DigestInputStream(inputStream, messageDigest)){
+                    byte[] buffer = new byte[buffersize];
+                    while(dis.read(buffer)>0);
+                }
+                return messageDigest.digest();
+            })
+            .valueAsNullableElseFail();
+            return digestBytes; // null-able
+        });
+    }
+
+}