You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/06/05 18:16:26 UTC

[tika] branch master updated: TIKA-2386 -- enable more options for DigestingParser

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 5410928  TIKA-2386 -- enable more options for DigestingParser
5410928 is described below

commit 5410928d740baf6cdcf6ce225f105482f3a2fc11
Author: tballison <ta...@mitre.org>
AuthorDate: Mon Jun 5 14:16:16 2017 -0400

    TIKA-2386 -- enable more options for DigestingParser
---
 CHANGES.txt                                        |   5 +
 .../batch/builders/AppParserFactoryBuilder.java    |  15 +-
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  14 +-
 .../org/apache/tika/parser/DigestingParser.java    |  13 +-
 .../tika/parser/digest/CompositeDigester.java      |  56 ++++
 .../tika/parser/digest/InputStreamDigester.java    | 254 +++++++--------
 .../tika/parser/utils/BouncyCastleDigester.java    | 101 ++++++
 .../apache/tika/parser/utils/CommonsDigester.java  | 356 +++++++--------------
 ...t.java => BouncyCastleDigestingParserTest.java} | 186 ++++++-----
 .../apache/tika/parser/DigestingParserTest.java    |  54 ++--
 .../java/org/apache/tika/server/TikaServerCli.java |  15 +-
 .../java/org/apache/tika/server/CXFTestBase.java   |   2 +-
 .../org/apache/tika/server/TikaResourceTest.java   |   2 +
 13 files changed, 562 insertions(+), 511 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index a9ffd32..dabf119 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,8 @@
+Release 1.15.1 - ??/??/????
+
+  * Enable base32 encoding of digests and enable BouncyCastle implementations
+    of digest algorithms (TIKA-2386).
+
 Release 1.15 - 05/23/2017
 
   * Tika now has a module for Deep Learning powered by the 
diff --git a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
index 998f649..ec05a46 100644
--- a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
+++ b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.tika.batch.DigestingAutoDetectParserFactory;
 import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.utils.BouncyCastleDigester;
 import org.apache.tika.parser.utils.CommonsDigester;
 import org.apache.tika.util.ClassLoaderUtil;
 import org.apache.tika.util.XMLDOMUtil;
@@ -55,8 +56,6 @@ public class AppParserFactoryBuilder implements IParserFactoryBuilder {
     }
 
     private DigestingParser.Digester buildDigester(Map<String, String> localAttrs) {
-        String digestString = localAttrs.get("digest");
-        CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse(digestString);
 
         String readLimitString = localAttrs.get("digestMarkLimit");
         if (readLimitString == null) {
@@ -71,6 +70,16 @@ public class AppParserFactoryBuilder implements IParserFactoryBuilder {
             throw new IllegalArgumentException("Parameter \"digestMarkLimit\" must be a parseable int: "+
             readLimitString);
         }
-        return new CommonsDigester(readLimit, algos);
+        String digestString = localAttrs.get("digest");
+        try {
+            return new CommonsDigester(readLimit, digestString);
+        } catch (IllegalArgumentException commonsException) {
+            try {
+                return new BouncyCastleDigester(readLimit, digestString);
+            } catch (IllegalArgumentException bcException) {
+                throw new IllegalArgumentException("Tried both CommonsDigester ("+commonsException.getMessage()+
+                        ") and BouncyCastleDigester ("+bcException.getMessage()+")", bcException);
+            }
+        }
     }
 }
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 707037b..1c9f9ab 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -18,6 +18,11 @@ package org.apache.tika.cli;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.sax.SAXTransformerFactory;
+import javax.xml.transform.sax.TransformerHandler;
+import javax.xml.transform.stream.StreamResult;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
@@ -52,12 +57,6 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeSet;
 
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.TransformerConfigurationException;
-import javax.xml.transform.sax.SAXTransformerFactory;
-import javax.xml.transform.sax.TransformerHandler;
-import javax.xml.transform.stream.StreamResult;
-
 import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.input.CloseShieldInputStream;
@@ -406,9 +405,8 @@ public class TikaCLI {
         } else if (arg.startsWith("--config=")) {
             configure(arg.substring("--config=".length()));
         } else if (arg.startsWith("--digest=")) {
-            CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse(
+            digester = new CommonsDigester(MAX_MARK,
                     arg.substring("--digest=".length()));
-            digester = new CommonsDigester(MAX_MARK,algos);
             parser = new DigestingParser(parser, digester);
         } else if (arg.startsWith("-e")) {
             encoding = arg.substring("-e".length());
diff --git a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
index 08b028e..0e4c8c8 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
@@ -31,8 +31,8 @@ import org.xml.sax.SAXException;
 public class DigestingParser extends ParserDecorator {
 
     /**
-     * Interface for optional digester, if specified during construction.
-     * See org.apache.parser.utils.CommonsDigester in tika-parsers for an
+     * Interface for digester. See
+     * org.apache.parser.utils.CommonsDigester in tika-parsers for an
      * implementation.
      */
     public interface Digester {
@@ -53,10 +53,15 @@ public class DigestingParser extends ParserDecorator {
          * @throws IOException
          */
         void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException;
-
-
     };
 
+    /**
+     * Encodes byte array from a MessageDigest to String
+     */
+    public interface Encoder {
+        String encode(byte[] bytes);
+    }
+
     private final Digester digester;
     /**
      * Creates a decorator for the given parser.
diff --git a/tika-core/src/main/java/org/apache/tika/parser/digest/CompositeDigester.java b/tika-core/src/main/java/org/apache/tika/parser/digest/CompositeDigester.java
new file mode 100644
index 0000000..f41d98b
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/digest/CompositeDigester.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.digest;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.ParseContext;
+
+
+public class CompositeDigester implements DigestingParser.Digester {
+
+    private final DigestingParser.Digester[] digesters;
+
+    public CompositeDigester(DigestingParser.Digester ... digesters) {
+        this.digesters = digesters;
+    }
+
+    @Override
+    public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException {
+        TemporaryResources tmp = new TemporaryResources();
+        TikaInputStream tis = TikaInputStream.get(is, tmp);
+        try {
+            for (DigestingParser.Digester digester : digesters) {
+                digester.digest(tis, m, parseContext);
+            }
+        } finally {
+            try {
+                tmp.dispose();
+            } catch (TikaException e) {
+                throw new IOExceptionWithCause(e);
+            }
+        }
+    }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java b/tika-core/src/main/java/org/apache/tika/parser/digest/InputStreamDigester.java
similarity index 50%
copy from tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
copy to tika-core/src/main/java/org/apache/tika/parser/digest/InputStreamDigester.java
index 846ab72..40a92a6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/digest/InputStreamDigester.java
@@ -1,5 +1,3 @@
-package org.apache.tika.parser.utils;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,17 +15,16 @@ package org.apache.tika.parser.utils;
  * limitations under the License.
  */
 
+package org.apache.tika.parser.digest;
+
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.security.Provider;
 
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.io.IOUtils;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.io.TemporaryResources;
@@ -36,55 +33,80 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.DigestingParser;
 import org.apache.tika.parser.ParseContext;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Implementation of {@link org.apache.tika.parser.DigestingParser.Digester}
- * that relies on commons.codec.digest.DigestUtils to calculate digest hashes.
- * <p>
- * This digester tries to use the regular mark/reset protocol on the InputStream.
- * However, this wraps an internal BoundedInputStream, and if the InputStream
- * is not fully read, then this will reset the stream and
- * spool the InputStream to disk (via TikaInputStream) and then digest the file.
- * <p>
- * If a TikaInputStream is passed in and it has an underlying file that is longer
- * than the {@link #markLimit}, then this digester digests the file directly.
- */
-public class CommonsDigester implements DigestingParser.Digester {
 
-    private static final Logger LOG = LoggerFactory.getLogger(CommonsDigester.class);
+public class InputStreamDigester implements DigestingParser.Digester {
 
+    private final String algorithm;
+    private final String algorithmKeyName;
+    private final DigestingParser.Encoder encoder;
+    private final int markLimit;
 
-    public enum DigestAlgorithm {
-        //those currently available in commons.digest
-        MD2,
-        MD5,
-        SHA1,
-        SHA256,
-        SHA384,
-        SHA512;
-
-        String getMetadataKey() {
-            return TikaCoreProperties.TIKA_META_PREFIX +
-                    "digest" + Metadata.NAMESPACE_PREFIX_DELIMITER + this.toString();
-        }
+    public InputStreamDigester(int markLimit, String algorithm,
+                               DigestingParser.Encoder encoder) {
+        this(markLimit, algorithm, algorithm, encoder);
     }
 
-    private final List<DigestAlgorithm> algorithms = new ArrayList<DigestAlgorithm>();
-    private final int markLimit;
+    /**
+     *
+     * @param markLimit limit in bytes to allow for mark/reset.  If the inputstream is longer
+     *                  than this limit, the stream will be reset and then spooled to a temporary file.
+     *                  Throws IllegalArgumentException if < 0.
+     * @param algorithm name of the digest algorithm to retrieve from the Provider
+     * @param algorithmKeyName name of the algorithm to store
+     *                         as part of the key in the metadata
+     *                         when {@link #digest(InputStream, Metadata, ParseContext)} is called
+     * @param encoder encoder to convert the byte array returned from the digester to a string
+     */
+    public InputStreamDigester(int markLimit, String algorithm, String algorithmKeyName,
+                               DigestingParser.Encoder encoder) {
+        this.algorithm = algorithm;
+        this.algorithmKeyName = algorithmKeyName;
+        this.encoder = encoder;
+        this.markLimit = markLimit;
 
-    public CommonsDigester(int markLimit, DigestAlgorithm... algorithms) {
-        Collections.addAll(this.algorithms, algorithms);
         if (markLimit < 0) {
             throw new IllegalArgumentException("markLimit must be >= 0");
         }
-        this.markLimit = markLimit;
     }
 
-    @Override
-    public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException {
+    private MessageDigest newMessageDigest() {
+        try {
+            Provider provider = getProvider();
+            if (provider == null) {
+                return MessageDigest.getInstance(algorithm);
+            } else {
+                return MessageDigest.getInstance(algorithm, provider);
+            }
+        } catch (NoSuchAlgorithmException e) {
+            throw new IllegalArgumentException(e);
+        }
+    }
 
+    /**
+     *
+     * When subclassing this, becare to ensure that your provider is
+     * thread-safe (not likely) or return a new provider with each call.
+     *
+     *
+     * @return provider to use to get the MessageDigest from the algorithm name.
+     * Default is to return null.
+     */
+    protected Provider getProvider() {
+        return null;
+    }
+
+    /**
+     *
+     * @param is InputStream to digest. Best to use a TikaInputStream because
+     *           of potential need to spool to disk.  InputStream must
+     *           support mark/reset.
+     * @param metadata metadata in which to store the digest information
+     * @param parseContext ParseContext -- not actually used yet, but there for future expansion
+     * @throws IOException on IO problem or IllegalArgumentException if algorithm couldn't be found
+     */
+    @Override
+    public void digest(InputStream is, Metadata metadata,
+                       ParseContext parseContext) throws IOException {
         TikaInputStream tis = TikaInputStream.cast(is);
         if (tis != null && tis.hasFile()) {
             long sz = -1;
@@ -95,145 +117,97 @@ public class CommonsDigester implements DigestingParser.Digester {
             //and its size is greater than its mark limit,
             //just digest the underlying file.
             if (sz > markLimit) {
-                digestFile(tis.getFile(), m);
+                digestFile(tis.getFile(), metadata);
                 return;
             }
         }
 
+
         //try the usual mark/reset stuff.
         //however, if you actually hit the bound,
         //then stop and spool to file via TikaInputStream
         SimpleBoundedInputStream bis = new SimpleBoundedInputStream(markLimit, is);
         boolean finishedStream = false;
-        for (DigestAlgorithm algorithm : algorithms) {
-            bis.mark(markLimit + 1);
-            finishedStream = digestEach(algorithm, bis, m);
-            bis.reset();
-            if (!finishedStream) {
-                break;
-            }
+        bis.mark(markLimit + 1);
+        finishedStream = digestStream(bis, metadata);
+        bis.reset();
+        if (finishedStream) {
+            return;
         }
         //if the stream wasn't finished -- if the stream was longer than the mark limit --
         //spool to File and digest that.
-        if (!finishedStream) {
-            if (tis != null) {
-                digestFile(tis.getFile(), m);
-            } else {
-                TemporaryResources tmp = new TemporaryResources();
+        if (tis != null) {
+            digestFile(tis.getFile(), metadata);
+        } else {
+            TemporaryResources tmp = new TemporaryResources();
+            try {
+                TikaInputStream tmpTikaInputStream = TikaInputStream.get(is, tmp);
+                digestFile(tmpTikaInputStream.getFile(), metadata);
+            } finally {
                 try {
-                    TikaInputStream tmpTikaInputStream = TikaInputStream.get(is, tmp);
-                    digestFile(tmpTikaInputStream.getFile(), m);
-                } finally {
-                    try {
-                        tmp.dispose();
-                    } catch (TikaException e) {
-                        throw new IOExceptionWithCause(e);
-                    }
+                    tmp.dispose();
+                } catch (TikaException e) {
+                    throw new IOExceptionWithCause(e);
                 }
             }
         }
+    }
+
 
+    private String getMetadataKey() {
+        return TikaCoreProperties.TIKA_META_PREFIX +
+                "digest" + Metadata.NAMESPACE_PREFIX_DELIMITER +
+                algorithmKeyName;
     }
 
     private void digestFile(File f, Metadata m) throws IOException {
-        for (DigestAlgorithm algorithm : algorithms) {
-            InputStream is = new FileInputStream(f);
-            try {
-                digestEach(algorithm, is, m);
-            } finally {
-                IOUtils.closeQuietly(is);
-            }
+        try (InputStream is = new FileInputStream(f)) {
+            digestStream(is, m);
         }
     }
 
     /**
-     * @param algorithm algo to use
-     * @param is        input stream to read from
-     * @param metadata  metadata for reporting the digest
+     * @param is       input stream to read from
+     * @param metadata metadata for reporting the digest
      * @return whether or not this finished the input stream
      * @throws IOException
      */
-    private boolean digestEach(DigestAlgorithm algorithm,
-                               InputStream is, Metadata metadata) throws IOException {
-        String digest = null;
-        try {
-            switch (algorithm) {
-                case MD2:
-                    digest = DigestUtils.md2Hex(is);
-                    break;
-                case MD5:
-                    digest = DigestUtils.md5Hex(is);
-                    break;
-                case SHA1:
-                    digest = DigestUtils.sha1Hex(is);
-                    break;
-                case SHA256:
-                    digest = DigestUtils.sha256Hex(is);
-                    break;
-                case SHA384:
-                    digest = DigestUtils.sha384Hex(is);
-                    break;
-                case SHA512:
-                    digest = DigestUtils.sha512Hex(is);
-                    break;
-                default:
-                    throw new IllegalArgumentException("Sorry, not aware of algorithm: " + algorithm.toString());
-            }
-        } catch (IOException e) {
-            LOG.warn("Problem digesting", e);
-            //swallow, or should we throw this?
-        }
+    private boolean digestStream(InputStream is, Metadata metadata) throws IOException {
+        byte[] digestBytes;
+        MessageDigest messageDigest = newMessageDigest();
+
+        updateDigest(messageDigest, is);
+        digestBytes = messageDigest.digest();
+
         if (is instanceof SimpleBoundedInputStream) {
             if (((SimpleBoundedInputStream) is).hasHitBound()) {
                 return false;
             }
         }
-        metadata.set(algorithm.getMetadataKey(), digest);
+        metadata.set(getMetadataKey(), encoder.encode(digestBytes));
         return true;
     }
 
+
     /**
-     * @param s comma-delimited (no space) list of algorithms to use: md5,sha256
-     * @return
+     * Copied from commons-codec
      */
-    public static DigestAlgorithm[] parse(String s) {
-        assert (s != null);
-
-        List<DigestAlgorithm> ret = new ArrayList<>();
-        for (String algoString : s.split(",")) {
-            String uc = algoString.toUpperCase(Locale.ROOT);
-            if (uc.equals(DigestAlgorithm.MD2.toString())) {
-                ret.add(DigestAlgorithm.MD2);
-            } else if (uc.equals(DigestAlgorithm.MD5.toString())) {
-                ret.add(DigestAlgorithm.MD5);
-            } else if (uc.equals(DigestAlgorithm.SHA1.toString())) {
-                ret.add(DigestAlgorithm.SHA1);
-            } else if (uc.equals(DigestAlgorithm.SHA256.toString())) {
-                ret.add(DigestAlgorithm.SHA256);
-            } else if (uc.equals(DigestAlgorithm.SHA384.toString())) {
-                ret.add(DigestAlgorithm.SHA384);
-            } else if (uc.equals(DigestAlgorithm.SHA512.toString())) {
-                ret.add(DigestAlgorithm.SHA512);
-            } else {
-                StringBuilder sb = new StringBuilder();
-                int i = 0;
-                for (DigestAlgorithm algo : DigestAlgorithm.values()) {
-                    if (i++ > 0) {
-                        sb.append(", ");
-                    }
-                    sb.append(algo.toString());
-                }
-                throw new IllegalArgumentException("Couldn't match " + s + " with any of: " + sb.toString());
-            }
+    private static MessageDigest updateDigest(MessageDigest digest, InputStream data) throws IOException {
+        byte[] buffer = new byte[1024];
+
+        for (int read = data.read(buffer, 0, 1024); read > -1; read = data.read(buffer, 0, 1024)) {
+            digest.update(buffer, 0, read);
         }
-        return ret.toArray(new DigestAlgorithm[ret.size()]);
+
+        return digest;
     }
 
+
     /**
      * Very slight modification of Commons' BoundedInputStream
      * so that we can figure out if this hit the bound or not.
      */
-    private class SimpleBoundedInputStream extends InputStream {
+    private static class SimpleBoundedInputStream extends InputStream {
         private final static int EOF = -1;
         private final long max;
         private final InputStream in;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/utils/BouncyCastleDigester.java b/tika-parsers/src/main/java/org/apache/tika/parser/utils/BouncyCastleDigester.java
new file mode 100644
index 0000000..2b6529c
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/utils/BouncyCastleDigester.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.utils;
+
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.security.Provider;
+
+import org.apache.commons.codec.binary.Base32;
+import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.digest.CompositeDigester;
+import org.apache.tika.parser.digest.InputStreamDigester;
+import org.bouncycastle.jce.provider.BouncyCastleProvider;
+import org.bouncycastle.util.encoders.Hex;
+
+/**
+ * Digester that relies on BouncyCastle for MessageDigest implementations.
+ *
+ */
+public class BouncyCastleDigester extends CompositeDigester {
+
+    /**
+     * Include a string representing the comma-separated algorithms to run: e.g. "md5,sha1".
+     * If you want base 32 encoding instead of hexadecimal, add ":32" to the algorithm, e.g. "md5,sha1:32"
+     * <p/>
+     * Will throw an IllegalArgumentException if an algorithm isn't supported
+     * @param markLimit
+     * @param algorithmString
+     */
+    public BouncyCastleDigester(int markLimit, String algorithmString) {
+        super(buildDigesters(markLimit, algorithmString));
+    }
+
+    private static DigestingParser.Digester[] buildDigesters(int markLimit, String digesterDef) {
+        String[] digests = digesterDef.split(",");
+        DigestingParser.Digester[] digesters = new DigestingParser.Digester[digests.length];
+        int i = 0;
+        for (String digest : digests) {
+            String[] parts = digest.split(":");
+            DigestingParser.Encoder encoder = null;
+            if (parts.length > 1) {
+                if (parts[1].equals("16")) {
+                    encoder = new HexEncoder();
+                } else if (parts[1].equals("32")) {
+                    encoder = new Base32Encoder();
+                } else {
+                    throw new IllegalArgumentException("Value must be '16' or '32'");
+                }
+            } else {
+                encoder = new HexEncoder();
+            }
+            digesters[i++] = new BCInputStreamDigester(markLimit, parts[0], encoder);
+        }
+        return digesters;
+    }
+
+    private static class HexEncoder implements DigestingParser.Encoder {
+        @Override
+        public String encode(byte[] bytes) {
+            return Hex.toHexString(bytes);
+        }
+    }
+
+    private static class Base32Encoder implements DigestingParser.Encoder {
+        @Override
+        public String encode(byte[] bytes) {
+            return new Base32().encodeToString(bytes);
+        }
+    }
+
+    private static class BCInputStreamDigester extends InputStreamDigester {
+
+        public BCInputStreamDigester(int markLimit, String algorithm, DigestingParser.Encoder encoder) {
+            super(markLimit, algorithm, encoder);
+            try {
+                MessageDigest.getInstance(algorithm, getProvider());
+            } catch (NoSuchAlgorithmException e) {
+                throw new IllegalArgumentException(e);
+            }
+        }
+
+        @Override
+        protected Provider getProvider() {
+            return new BouncyCastleProvider();
+        }
+    }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java b/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
index 846ab72..a467651 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
@@ -1,5 +1,3 @@
-package org.apache.tika.parser.utils;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,28 +14,19 @@ package org.apache.tika.parser.utils;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.tika.parser.utils;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
 
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOExceptionWithCause;
-import org.apache.tika.io.TemporaryResources;
-import org.apache.tika.io.TikaInputStream;
+import org.apache.commons.codec.binary.Base32;
+import org.apache.commons.codec.binary.Hex;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.ParseContext;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.tika.parser.digest.CompositeDigester;
+import org.apache.tika.parser.digest.InputStreamDigester;
 
 /**
  * Implementation of {@link org.apache.tika.parser.DigestingParser.Digester}
@@ -47,280 +36,151 @@ import org.slf4j.LoggerFactory;
  * However, this wraps an internal BoundedInputStream, and if the InputStream
  * is not fully read, then this will reset the stream and
  * spool the InputStream to disk (via TikaInputStream) and then digest the file.
- * <p>
- * If a TikaInputStream is passed in and it has an underlying file that is longer
- * than the {@link #markLimit}, then this digester digests the file directly.
  */
-public class CommonsDigester implements DigestingParser.Digester {
-
-    private static final Logger LOG = LoggerFactory.getLogger(CommonsDigester.class);
-
+public class CommonsDigester extends CompositeDigester {
 
     public enum DigestAlgorithm {
         //those currently available in commons.digest
-        MD2,
-        MD5,
-        SHA1,
-        SHA256,
-        SHA384,
-        SHA512;
+        MD2("MD2"),
+        MD5("MD5"),
+        SHA1("SHA-1"),
+        SHA256("SHA-256"),
+        SHA384("SHA-384"),
+        SHA512("SHA-512");
 
-        String getMetadataKey() {
-            return TikaCoreProperties.TIKA_META_PREFIX +
-                    "digest" + Metadata.NAMESPACE_PREFIX_DELIMITER + this.toString();
-        }
-    }
+        private final String javaName;
 
-    private final List<DigestAlgorithm> algorithms = new ArrayList<DigestAlgorithm>();
-    private final int markLimit;
-
-    public CommonsDigester(int markLimit, DigestAlgorithm... algorithms) {
-        Collections.addAll(this.algorithms, algorithms);
-        if (markLimit < 0) {
-            throw new IllegalArgumentException("markLimit must be >= 0");
+        DigestAlgorithm(String javaName) {
+            this.javaName = javaName;
         }
-        this.markLimit = markLimit;
-    }
-
-    @Override
-    public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException {
 
-        TikaInputStream tis = TikaInputStream.cast(is);
-        if (tis != null && tis.hasFile()) {
-            long sz = -1;
-            if (tis.hasFile()) {
-                sz = tis.getLength();
-            }
-            //if the inputstream has a file,
-            //and its size is greater than its mark limit,
-            //just digest the underlying file.
-            if (sz > markLimit) {
-                digestFile(tis.getFile(), m);
-                return;
-            }
+        String getJavaName() {
+            return javaName;
         }
-
-        //try the usual mark/reset stuff.
-        //however, if you actually hit the bound,
-        //then stop and spool to file via TikaInputStream
-        SimpleBoundedInputStream bis = new SimpleBoundedInputStream(markLimit, is);
-        boolean finishedStream = false;
-        for (DigestAlgorithm algorithm : algorithms) {
-            bis.mark(markLimit + 1);
-            finishedStream = digestEach(algorithm, bis, m);
-            bis.reset();
-            if (!finishedStream) {
-                break;
-            }
-        }
-        //if the stream wasn't finished -- if the stream was longer than the mark limit --
-        //spool to File and digest that.
-        if (!finishedStream) {
-            if (tis != null) {
-                digestFile(tis.getFile(), m);
-            } else {
-                TemporaryResources tmp = new TemporaryResources();
-                try {
-                    TikaInputStream tmpTikaInputStream = TikaInputStream.get(is, tmp);
-                    digestFile(tmpTikaInputStream.getFile(), m);
-                } finally {
-                    try {
-                        tmp.dispose();
-                    } catch (TikaException e) {
-                        throw new IOExceptionWithCause(e);
-                    }
-                }
-            }
+        String getMetadataKey() {
+            return TikaCoreProperties.TIKA_META_PREFIX +
+                    "digest" + Metadata.NAMESPACE_PREFIX_DELIMITER + this.toString();
         }
-
     }
 
-    private void digestFile(File f, Metadata m) throws IOException {
-        for (DigestAlgorithm algorithm : algorithms) {
-            InputStream is = new FileInputStream(f);
-            try {
-                digestEach(algorithm, is, m);
-            } finally {
-                IOUtils.closeQuietly(is);
-            }
-        }
+    /**
+     * Include a string representing the comma-separated algorithms to run: e.g. "md5,sha1".
+     * If you want base 32 encoding instead of hexadecimal, add ":32" to the algorithm, e.g. "md5,sha1:32"
+     * <p/>
+     * Will throw an IllegalArgumentException if an algorithm isn't supported
+     * @param markLimit
+     * @param algorithmString
+     */
+    public CommonsDigester(int markLimit, String algorithmString) {
+        super(buildDigesters(markLimit, algorithmString));
     }
 
     /**
-     * @param algorithm algo to use
-     * @param is        input stream to read from
-     * @param metadata  metadata for reporting the digest
-     * @return whether or not this finished the input stream
-     * @throws IOException
+     *
+     * @param markLimit limit for mark/reset; after this limit is hit, the
+     *                  stream is reset and spooled to disk
+     * @param algorithms algorithms to run
+     * @deprecated use {@link #CommonsDigester(int, String)}
      */
-    private boolean digestEach(DigestAlgorithm algorithm,
-                               InputStream is, Metadata metadata) throws IOException {
-        String digest = null;
-        try {
-            switch (algorithm) {
-                case MD2:
-                    digest = DigestUtils.md2Hex(is);
-                    break;
-                case MD5:
-                    digest = DigestUtils.md5Hex(is);
-                    break;
-                case SHA1:
-                    digest = DigestUtils.sha1Hex(is);
-                    break;
-                case SHA256:
-                    digest = DigestUtils.sha256Hex(is);
-                    break;
-                case SHA384:
-                    digest = DigestUtils.sha384Hex(is);
-                    break;
-                case SHA512:
-                    digest = DigestUtils.sha512Hex(is);
-                    break;
-                default:
-                    throw new IllegalArgumentException("Sorry, not aware of algorithm: " + algorithm.toString());
-            }
-        } catch (IOException e) {
-            LOG.warn("Problem digesting", e);
-            //swallow, or should we throw this?
-        }
-        if (is instanceof SimpleBoundedInputStream) {
-            if (((SimpleBoundedInputStream) is).hasHitBound()) {
-                return false;
-            }
+    public CommonsDigester(int markLimit, DigestAlgorithm... algorithms) {
+        super(buildDigesters(markLimit, algorithms));
+    }
+
+    private static DigestingParser.Digester[] buildDigesters(int markLimit, DigestAlgorithm[] algorithms) {
+        DigestingParser.Digester[] digesters = new DigestingParser.Digester[algorithms.length];
+        int i = 0;
+        for (DigestAlgorithm algorithm : algorithms) {
+            digesters[i++] = new InputStreamDigester(markLimit, algorithm.getJavaName(), algorithm.name(),
+                    new HexEncoder());
         }
-        metadata.set(algorithm.getMetadataKey(), digest);
-        return true;
+        return digesters;
     }
 
     /**
-     * @param s comma-delimited (no space) list of algorithms to use: md5,sha256
+     * This returns digest algorithms only.  It does not understand the encoding
+     * syntax, e.g. "MD5:32" (base 32 encoding of MD5).  To parse
+     * those, see {@link #CommonsDigester(int, String)}.
+     *
+     * @deprecated use the {@link #CommonsDigester(int, String)} instead
+     * @param s comma-delimited (no space) list of algorithms to use: md5,sha256.
      * @return
+     *
      */
+    @Deprecated
     public static DigestAlgorithm[] parse(String s) {
         assert (s != null);
 
         List<DigestAlgorithm> ret = new ArrayList<>();
         for (String algoString : s.split(",")) {
-            String uc = algoString.toUpperCase(Locale.ROOT);
-            if (uc.equals(DigestAlgorithm.MD2.toString())) {
-                ret.add(DigestAlgorithm.MD2);
-            } else if (uc.equals(DigestAlgorithm.MD5.toString())) {
-                ret.add(DigestAlgorithm.MD5);
-            } else if (uc.equals(DigestAlgorithm.SHA1.toString())) {
-                ret.add(DigestAlgorithm.SHA1);
-            } else if (uc.equals(DigestAlgorithm.SHA256.toString())) {
-                ret.add(DigestAlgorithm.SHA256);
-            } else if (uc.equals(DigestAlgorithm.SHA384.toString())) {
-                ret.add(DigestAlgorithm.SHA384);
-            } else if (uc.equals(DigestAlgorithm.SHA512.toString())) {
-                ret.add(DigestAlgorithm.SHA512);
-            } else {
-                StringBuilder sb = new StringBuilder();
-                int i = 0;
-                for (DigestAlgorithm algo : DigestAlgorithm.values()) {
-                    if (i++ > 0) {
-                        sb.append(", ");
-                    }
-                    sb.append(algo.toString());
-                }
-                throw new IllegalArgumentException("Couldn't match " + s + " with any of: " + sb.toString());
-            }
+            ret.add(getDigestAlgorithm(algoString));
         }
         return ret.toArray(new DigestAlgorithm[ret.size()]);
     }
 
-    /**
-     * Very slight modification of Commons' BoundedInputStream
-     * so that we can figure out if this hit the bound or not.
-     */
-    private class SimpleBoundedInputStream extends InputStream {
-        private final static int EOF = -1;
-        private final long max;
-        private final InputStream in;
-        private long pos;
-
-        private SimpleBoundedInputStream(long max, InputStream in) {
-            this.max = max;
-            this.in = in;
-        }
-
-        @Override
-        public int read() throws IOException {
-            if (max >= 0 && pos >= max) {
-                return EOF;
+    private static DigestAlgorithm getDigestAlgorithm(String algoString) {
+        String uc = algoString.toUpperCase(Locale.ROOT);
+        if (uc.equals(DigestAlgorithm.MD2.toString())) {
+            return DigestAlgorithm.MD2;
+        } else if (uc.equals(DigestAlgorithm.MD5.toString())) {
+            return DigestAlgorithm.MD5;
+        } else if (uc.equals(DigestAlgorithm.SHA1.toString())) {
+            return DigestAlgorithm.SHA1;
+        } else if (uc.equals(DigestAlgorithm.SHA256.toString())) {
+            return DigestAlgorithm.SHA256;
+        } else if (uc.equals(DigestAlgorithm.SHA384.toString())) {
+            return DigestAlgorithm.SHA384;
+        } else if (uc.equals(DigestAlgorithm.SHA512.toString())) {
+            return DigestAlgorithm.SHA512;
+        } else {
+            StringBuilder sb = new StringBuilder();
+            int i = 0;
+            for (DigestAlgorithm algo : DigestAlgorithm.values()) {
+                if (i++ > 0) {
+                    sb.append(", ");
+                }
+                sb.append(algo.toString());
             }
-            final int result = in.read();
-            pos++;
-            return result;
+            throw new IllegalArgumentException("Couldn't match " + algoString + " with any of: " + sb.toString());
         }
+    }
 
-        /**
-         * Invokes the delegate's <code>read(byte[])</code> method.
-         *
-         * @param b the buffer to read the bytes into
-         * @return the number of bytes read or -1 if the end of stream or
-         * the limit has been reached.
-         * @throws IOException if an I/O error occurs
-         */
-        @Override
-        public int read(final byte[] b) throws IOException {
-            return this.read(b, 0, b.length);
-        }
-
-        /**
-         * Invokes the delegate's <code>read(byte[], int, int)</code> method.
-         *
-         * @param b   the buffer to read the bytes into
-         * @param off The start offset
-         * @param len The number of bytes to read
-         * @return the number of bytes read or -1 if the end of stream or
-         * the limit has been reached.
-         * @throws IOException if an I/O error occurs
-         */
-        @Override
-        public int read(final byte[] b, final int off, final int len) throws IOException {
-            if (max >= 0 && pos >= max) {
-                return EOF;
-            }
-            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
-            final int bytesRead = in.read(b, off, (int) maxRead);
-
-            if (bytesRead == EOF) {
-                return EOF;
+    private static DigestingParser.Digester[] buildDigesters(int markLimit, String digesterDef) {
+        String[] digests = digesterDef.split(",");
+        DigestingParser.Digester[] digesters = new DigestingParser.Digester[digests.length];
+        int i = 0;
+        for (String digest : digests) {
+            String[] parts = digest.split(":");
+            DigestingParser.Encoder encoder = null;
+            if (parts.length > 1) {
+                if (parts[1].equals("16")) {
+                    encoder = new HexEncoder();
+                } else if (parts[1].equals("32")) {
+                    encoder = new Base32Encoder();
+                } else {
+                    throw new IllegalArgumentException("Value must be '16' or '32'");
+                }
+            } else {
+                encoder = new HexEncoder();
             }
-
-            pos += bytesRead;
-            return bytesRead;
+            DigestAlgorithm digestAlgorithm = getDigestAlgorithm(parts[0]);
+            digesters[i++] = new InputStreamDigester(markLimit, digestAlgorithm.getJavaName(),
+                    digestAlgorithm.name(), encoder);
         }
+        return digesters;
+    }
 
-        /**
-         * Invokes the delegate's <code>skip(long)</code> method.
-         *
-         * @param n the number of bytes to skip
-         * @return the actual number of bytes skipped
-         * @throws IOException if an I/O error occurs
-         */
-        @Override
-        public long skip(final long n) throws IOException {
-            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
-            final long skippedBytes = in.skip(toSkip);
-            pos += skippedBytes;
-            return skippedBytes;
-        }
 
+    private static class HexEncoder implements DigestingParser.Encoder {
         @Override
-        public void reset() throws IOException {
-            in.reset();
-            pos = 0;
+        public String encode(byte[] bytes) {
+            return Hex.encodeHexString(bytes);
         }
+    }
 
+    private static class Base32Encoder implements DigestingParser.Encoder {
         @Override
-        public void mark(int readLimit) {
-            in.mark(readLimit);
-        }
-
-        public boolean hasHitBound() {
-            return pos >= max;
+        public String encode(byte[] bytes) {
+            return new Base32().encodeToString(bytes);
         }
     }
 }
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/BouncyCastleDigestingParserTest.java
similarity index 52%
copy from tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
copy to tika-parsers/src/test/java/org/apache/tika/parser/BouncyCastleDigestingParserTest.java
index 8b198a3..1d2861b 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/BouncyCastleDigestingParserTest.java
@@ -19,7 +19,6 @@ package org.apache.tika.parser;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
@@ -34,15 +33,16 @@ import java.util.Map;
 import java.util.Random;
 
 import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.tika.TikaTest;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.utils.BouncyCastleDigester;
 import org.junit.Test;
 
 
-public class DigestingParserTest extends TikaTest {
+public class BouncyCastleDigestingParserTest extends TikaTest {
 
     private final static String P = TikaCoreProperties.TIKA_META_PREFIX+
             "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER;
@@ -56,46 +56,69 @@ public class DigestingParserTest extends TikaTest {
 
     @Test
     public void testBasic() throws Exception {
-        Map<CommonsDigester.DigestAlgorithm, String> expected =
+        Map<String, String> expected =
                 new HashMap<>();
 
-        expected.put(CommonsDigester.DigestAlgorithm.MD2,"d768c8e27b0b52c6eaabfaa7122d1d4f");
-        expected.put(CommonsDigester.DigestAlgorithm.MD5,"59f626e09a8c16ab6dbc2800c685f772");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA1,"7a1f001d163ac90d8ea54c050faf5a38079788a6");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA256,"c4b7fab030a8b6a9d6691f6699ac8e6f" +
-                                                            "82bc53764a0f1430d134ae3b70c32654");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA384,"ebe368b9326fef44408290724d187553"+
-                                                            "8b8a6923fdf251ddab72c6e4b5d54160" +
-                                                            "9db917ba4260d1767995a844d8d654df");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA512,"ee46d973ee1852c018580c242955974d"+
-                                                            "da4c21f36b54d7acd06fcf68e974663b"+
-                                                            "fed1d256875be58d22beacf178154cc3"+
-                                                            "a1178cb73443deaa53aa0840324708bb");
+        expected.put("MD2", "d768c8e27b0b52c6eaabfaa7122d1d4f");
+        expected.put("MD5", "59f626e09a8c16ab6dbc2800c685f772");
+        expected.put("SHA1", "7a1f001d163ac90d8ea54c050faf5a38079788a6");
+        expected.put("SHA256", "c4b7fab030a8b6a9d6691f6699ac8e6f" +
+                "82bc53764a0f1430d134ae3b70c32654");
+        expected.put("SHA384", "ebe368b9326fef44408290724d187553" +
+                "8b8a6923fdf251ddab72c6e4b5d54160" +
+                "9db917ba4260d1767995a844d8d654df");
+        expected.put("SHA512", "ee46d973ee1852c018580c242955974d" +
+                "da4c21f36b54d7acd06fcf68e974663b" +
+                "fed1d256875be58d22beacf178154cc3" +
+                "a1178cb73443deaa53aa0840324708bb");
 
         //test each one
-        for (CommonsDigester.DigestAlgorithm algo : CommonsDigester.DigestAlgorithm.values()) {
+        for (String algo : expected.keySet()) {
             Metadata m = new Metadata();
             XMLResult xml = getXML("test_recursive_embedded.docx",
-                    new DigestingParser(p, new CommonsDigester(UNLIMITED, algo)), m);
-            assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
+                    new DigestingParser(p, new BouncyCastleDigester(UNLIMITED, algo)), m);
+            assertEquals(algo, expected.get(algo), m.get(P + algo));
         }
 
+    }
+
+    @Test
+    public void testCommaSeparated() throws Exception {
+        Map<String, String> expected =
+                new HashMap<>();
 
+        expected.put("MD2", "d768c8e27b0b52c6eaabfaa7122d1d4f");
+        expected.put("MD5", "59f626e09a8c16ab6dbc2800c685f772");
+        expected.put("SHA1", "7a1f001d163ac90d8ea54c050faf5a38079788a6");
+        expected.put("SHA256", "c4b7fab030a8b6a9d6691f6699ac8e6f" +
+                "82bc53764a0f1430d134ae3b70c32654");
+        expected.put("SHA384", "ebe368b9326fef44408290724d187553" +
+                "8b8a6923fdf251ddab72c6e4b5d54160" +
+                "9db917ba4260d1767995a844d8d654df");
+        expected.put("SHA512",
+                "ee46d973ee1852c018580c242955974d" +
+                "da4c21f36b54d7acd06fcf68e974663b" +
+                "fed1d256875be58d22beacf178154cc3" +
+                "a1178cb73443deaa53aa0840324708bb");
+        expected.put("SHA3-512",
+                "04337f667a250348a1acb992863b3ddc"+
+                "eab38365c206c18d356d2b31675ad669"+
+                "5fb5497f4e79b11640aefbb8042a5dbb"+
+                "7ec6c2c6c1b6e19210453591c52cb6eb");
+        expected.put("SHA1", "PIPQAHIWHLEQ3DVFJQCQ7L22HADZPCFG");
         //test comma separated
-        CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse("md5,sha256,sha384,sha512");
         Metadata m = new Metadata();
         XMLResult xml = getXML("test_recursive_embedded.docx",
-                new DigestingParser(p, new CommonsDigester(UNLIMITED, algos)), m);
-        for (CommonsDigester.DigestAlgorithm algo : new CommonsDigester.DigestAlgorithm[]{
-                CommonsDigester.DigestAlgorithm.MD5,
-                CommonsDigester.DigestAlgorithm.SHA256,
-                CommonsDigester.DigestAlgorithm.SHA384,
-                CommonsDigester.DigestAlgorithm.SHA512}) {
-            assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
+                new DigestingParser(p, new BouncyCastleDigester(UNLIMITED,
+                        "MD5,SHA256,SHA384,SHA512,SHA3-512,SHA1:32")), m);
+        for (String algo : new String[]{
+                "MD5", "SHA256", "SHA384", "SHA512", "SHA3-512",
+                "SHA1"
+        }) {
+            assertEquals(algo, expected.get(algo), m.get(P + algo));
         }
 
-        assertNull(m.get(P+CommonsDigester.DigestAlgorithm.MD2.toString()));
-        assertNull(m.get(P+CommonsDigester.DigestAlgorithm.SHA1.toString()));
+        assertNull(m.get(P+"MD2"));
 
     }
 
@@ -104,21 +127,23 @@ public class DigestingParserTest extends TikaTest {
         String expectedMD5 = "59f626e09a8c16ab6dbc2800c685f772";
         Metadata m = new Metadata();
         XMLResult xml = getXML("test_recursive_embedded.docx",
-                new DigestingParser(p, new CommonsDigester(100, CommonsDigester.DigestAlgorithm.MD5)), m);
+                new DigestingParser(p, new BouncyCastleDigester(100, "MD5")), m);
         assertEquals(expectedMD5, m.get(P+"MD5"));
     }
 
-    @Test
+    @Test(expected = IllegalArgumentException.class)
     public void testNegativeMaxMarkLength() throws Exception {
-        Metadata m = new Metadata();
-        boolean ex = false;
-        try {
-            XMLResult xml = getXML("test_recursive_embedded.docx",
-                    new DigestingParser(p, new CommonsDigester(-1, CommonsDigester.DigestAlgorithm.MD5)), m);
-        } catch (IllegalArgumentException e) {
-            ex = true;
-        }
-        assertTrue("Exception not thrown", ex);
+        getXML("test_recursive_embedded.docx",
+                    new DigestingParser(p,
+                            new BouncyCastleDigester(-1, "MD5")));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testUnrecognizedEncodingOptions() throws Exception {
+        getXML("test_recursive_embedded.docx",
+                new DigestingParser(p,
+                        new BouncyCastleDigester(100000,
+                                "MD5:33")));
     }
 
     @Test
@@ -169,85 +194,74 @@ public class DigestingParserTest extends TikaTest {
         os.close();
 
         Metadata truth = new Metadata();
-        addTruth(tmp, CommonsDigester.DigestAlgorithm.MD5, truth);
-        addTruth(tmp, CommonsDigester.DigestAlgorithm.SHA1, truth);
-        addTruth(tmp, CommonsDigester.DigestAlgorithm.SHA512, truth);
+        addTruth(tmp, "MD5", truth);
+        addTruth(tmp, "SHA1", truth);
+        addTruth(tmp, "SHA512", truth);
 
 
         checkMulti(truth, tmp, fileLength, markLimit, useTikaInputStream,
-                CommonsDigester.DigestAlgorithm.SHA512,
-                CommonsDigester.DigestAlgorithm.SHA1,
-                CommonsDigester.DigestAlgorithm.MD5);
-
+                "SHA512",
+                "SHA1", "MD5");
         checkMulti(truth, tmp, fileLength, markLimit, useTikaInputStream,
-                CommonsDigester.DigestAlgorithm.MD5,
-                CommonsDigester.DigestAlgorithm.SHA1);
+                "MD5", "SHA1");
 
         checkMulti(truth, tmp, fileLength, markLimit, useTikaInputStream,
-                CommonsDigester.DigestAlgorithm.SHA1,
-                CommonsDigester.DigestAlgorithm.SHA512,
-                CommonsDigester.DigestAlgorithm.MD5);
-
+                "SHA1", "SHA512", "MD5");
         checkMulti(truth, tmp, fileLength, markLimit, useTikaInputStream,
-                CommonsDigester.DigestAlgorithm.SHA1);
+                "SHA1");
 
         checkMulti(truth, tmp, fileLength, markLimit, useTikaInputStream,
-                CommonsDigester.DigestAlgorithm.MD5);
+                "MD5");
 
     }
 
     private void checkMulti(Metadata truth, Path tmp,
                             int fileLength, int markLimit,
-                            boolean useTikaInputStream, CommonsDigester.DigestAlgorithm... algos) throws IOException {
+                            boolean useTikaInputStream,
+                            String... algos) throws IOException {
         Metadata result = new Metadata();
-        CommonsDigester digester = new CommonsDigester(markLimit, algos);
+        BouncyCastleDigester digester = new BouncyCastleDigester(markLimit,
+                StringUtils.join(algos, ","));
         try (InputStream is = useTikaInputStream ? TikaInputStream.get(tmp) :
                 new BufferedInputStream(Files.newInputStream(tmp))) {
             digester.digest(is, result, new ParseContext());
         }
 
-        for (CommonsDigester.DigestAlgorithm algo : algos) {
-            String truthValue = truth.get(P+algo.name());
-            String resultValue = result.get(P+algo.name());
+        for (String algo : algos) {
+            String truthValue = truth.get(P+algo);
+            String resultValue = result.get(P+algo);
             assertNotNull("truth", truthValue);
             assertNotNull("result (fileLength="+fileLength+", markLimit="+markLimit+")",
                     resultValue);
-
             assertEquals("fileLength("+fileLength+") markLimit("+
-                    markLimit+") useTikaInputStream("+useTikaInputStream+")"+
-                    "algorithm("+algo.name()+") seed("+SEED+")",
+                    markLimit+") useTikaInputStream("+useTikaInputStream+") "+
+                    "algorithm("+algo+") seed("+SEED+")",
                     truthValue, resultValue);
         }
 
     }
 
-    private void addTruth(Path tmp, CommonsDigester.DigestAlgorithm algo, Metadata truth) throws IOException {
+    private void addTruth(Path tmp, String algo, Metadata truth) throws IOException {
         String digest = null;
+        //for now, rely on CommonsDigest for truth
         try (InputStream is = Files.newInputStream(tmp)) {
-            switch (algo) {
-                case MD2:
-                    digest = DigestUtils.md2Hex(is);
-                    break;
-                case MD5:
-                    digest = DigestUtils.md5Hex(is);
-                    break;
-                case SHA1:
-                    digest = DigestUtils.sha1Hex(is);
-                    break;
-                case SHA256:
-                    digest = DigestUtils.sha256Hex(is);
-                    break;
-                case SHA384:
-                    digest = DigestUtils.sha384Hex(is);
-                    break;
-                case SHA512:
-                    digest = DigestUtils.sha512Hex(is);
-                    break;
-                default:
-                    throw new IllegalArgumentException("Sorry, not aware of algorithm: " + algo.toString());
+            if ("MD2".equals(algo)) {
+                digest = DigestUtils.md2Hex(is);
+            } else if ("MD5".equals(algo)) {
+                digest = DigestUtils.md5Hex(is);
+            } else if ("SHA1".equals(algo)) {
+                digest = DigestUtils.sha1Hex(is);
+            } else if ("SHA256".equals(algo)) {
+                digest = DigestUtils.sha256Hex(is);
+            } else if ("SHA384".equals(algo)) {
+                digest = DigestUtils.sha384Hex(is);
+            } else if ("SHA512".equals(algo)) {
+                digest = DigestUtils.sha512Hex(is);
+            } else {
+                throw new IllegalArgumentException("Sorry, not aware of algorithm: " + algo);
             }
         }
-        truth.set(P+algo.name(), digest);
+        truth.set(P+algo, digest);
 
     }
 
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
index 8b198a3..931718e 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
@@ -59,18 +59,18 @@ public class DigestingParserTest extends TikaTest {
         Map<CommonsDigester.DigestAlgorithm, String> expected =
                 new HashMap<>();
 
-        expected.put(CommonsDigester.DigestAlgorithm.MD2,"d768c8e27b0b52c6eaabfaa7122d1d4f");
-        expected.put(CommonsDigester.DigestAlgorithm.MD5,"59f626e09a8c16ab6dbc2800c685f772");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA1,"7a1f001d163ac90d8ea54c050faf5a38079788a6");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA256,"c4b7fab030a8b6a9d6691f6699ac8e6f" +
-                                                            "82bc53764a0f1430d134ae3b70c32654");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA384,"ebe368b9326fef44408290724d187553"+
-                                                            "8b8a6923fdf251ddab72c6e4b5d54160" +
-                                                            "9db917ba4260d1767995a844d8d654df");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA512,"ee46d973ee1852c018580c242955974d"+
-                                                            "da4c21f36b54d7acd06fcf68e974663b"+
-                                                            "fed1d256875be58d22beacf178154cc3"+
-                                                            "a1178cb73443deaa53aa0840324708bb");
+        expected.put(CommonsDigester.DigestAlgorithm.MD2, "d768c8e27b0b52c6eaabfaa7122d1d4f");
+        expected.put(CommonsDigester.DigestAlgorithm.MD5, "59f626e09a8c16ab6dbc2800c685f772");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA1, "7a1f001d163ac90d8ea54c050faf5a38079788a6");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA256, "c4b7fab030a8b6a9d6691f6699ac8e6f" +
+                "82bc53764a0f1430d134ae3b70c32654");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA384, "ebe368b9326fef44408290724d187553" +
+                "8b8a6923fdf251ddab72c6e4b5d54160" +
+                "9db917ba4260d1767995a844d8d654df");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA512, "ee46d973ee1852c018580c242955974d" +
+                "da4c21f36b54d7acd06fcf68e974663b" +
+                "fed1d256875be58d22beacf178154cc3" +
+                "a1178cb73443deaa53aa0840324708bb");
 
         //test each one
         for (CommonsDigester.DigestAlgorithm algo : CommonsDigester.DigestAlgorithm.values()) {
@@ -80,14 +80,35 @@ public class DigestingParserTest extends TikaTest {
             assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
         }
 
+    }
+
+    @Test
+    public void testCommaSeparated() throws Exception {
+        Map<CommonsDigester.DigestAlgorithm, String> expected =
+                new HashMap<>();
+
+
+        expected.put(CommonsDigester.DigestAlgorithm.MD2, "d768c8e27b0b52c6eaabfaa7122d1d4f");
+        expected.put(CommonsDigester.DigestAlgorithm.MD5, "59f626e09a8c16ab6dbc2800c685f772");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA1, "PIPQAHIWHLEQ3DVFJQCQ7L22HADZPCFG");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA256, "c4b7fab030a8b6a9d6691f6699ac8e6f" +
+                "82bc53764a0f1430d134ae3b70c32654");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA384, "ebe368b9326fef44408290724d187553" +
+                "8b8a6923fdf251ddab72c6e4b5d54160" +
+                "9db917ba4260d1767995a844d8d654df");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA512, "ee46d973ee1852c018580c242955974d" +
+                "da4c21f36b54d7acd06fcf68e974663b" +
+                "fed1d256875be58d22beacf178154cc3" +
+                "a1178cb73443deaa53aa0840324708bb");
 
         //test comma separated
-        CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse("md5,sha256,sha384,sha512");
         Metadata m = new Metadata();
         XMLResult xml = getXML("test_recursive_embedded.docx",
-                new DigestingParser(p, new CommonsDigester(UNLIMITED, algos)), m);
+                new DigestingParser(p, new CommonsDigester(UNLIMITED,
+                        "md5,sha256,sha384,sha512,sha1:32")), m);
         for (CommonsDigester.DigestAlgorithm algo : new CommonsDigester.DigestAlgorithm[]{
                 CommonsDigester.DigestAlgorithm.MD5,
+                CommonsDigester.DigestAlgorithm.SHA1,
                 CommonsDigester.DigestAlgorithm.SHA256,
                 CommonsDigester.DigestAlgorithm.SHA384,
                 CommonsDigester.DigestAlgorithm.SHA512}) {
@@ -95,8 +116,6 @@ public class DigestingParserTest extends TikaTest {
         }
 
         assertNull(m.get(P+CommonsDigester.DigestAlgorithm.MD2.toString()));
-        assertNull(m.get(P+CommonsDigester.DigestAlgorithm.SHA1.toString()));
-
     }
 
     @Test
@@ -212,9 +231,8 @@ public class DigestingParserTest extends TikaTest {
             assertNotNull("truth", truthValue);
             assertNotNull("result (fileLength="+fileLength+", markLimit="+markLimit+")",
                     resultValue);
-
             assertEquals("fileLength("+fileLength+") markLimit("+
-                    markLimit+") useTikaInputStream("+useTikaInputStream+")"+
+                    markLimit+") useTikaInputStream("+useTikaInputStream+") "+
                     "algorithm("+algo.name()+") seed("+SEED+")",
                     truthValue, resultValue);
         }
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
index b5d94d9..03d582e 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
+++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
@@ -37,6 +37,7 @@ import org.apache.cxf.rs.security.cors.CrossOriginResourceSharingFilter;
 import org.apache.tika.Tika;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.utils.BouncyCastleDigester;
 import org.apache.tika.parser.utils.CommonsDigester;
 import org.apache.tika.server.resource.DetectorResource;
 import org.apache.tika.server.resource.LanguageResource;
@@ -80,7 +81,7 @@ public class TikaServerCli {
         options.addOption("h", "host", true, "host name (default = " + DEFAULT_HOST + ", use * for all)");
         options.addOption("p", "port", true, "listen port (default = " + DEFAULT_PORT + ')');
         options.addOption("c", "config", true, "Tika Configuration file to override default config with.");
-        options.addOption("d", "digest", true, "include digest in metadata, e.g. md5,sha256");
+        options.addOption("d", "digest", true, "include digest in metadata, e.g. md5,sha1:32,sha256");
         options.addOption("dml", "digestMarkLimit", true, "max number of bytes to mark on stream for digest");
         options.addOption("l", "log", true, "request URI log level ('debug' or 'info')");
         options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'");
@@ -168,8 +169,16 @@ public class TikaServerCli {
                         throw new RuntimeException("Must have parseable int after digestMarkLimit(dml): "+dmlS);
                     }
                 }
-                digester = new CommonsDigester(digestMarkLimit,
-                        CommonsDigester.parse(line.getOptionValue("digest")));
+                try {
+                    digester = new CommonsDigester(digestMarkLimit, line.getOptionValue("digest"));
+                } catch (IllegalArgumentException commonsException) {
+                    try {
+                        digester = new BouncyCastleDigester(digestMarkLimit, line.getOptionValue("digest"));
+                    } catch (IllegalArgumentException bcException) {
+                        throw new IllegalArgumentException("Tried both CommonsDigester ("+commonsException.getMessage()+
+                                ") and BouncyCastleDigester ("+bcException.getMessage()+")", bcException);
+                    }
+                }
             }
 
             if (line.hasOption("enableFileUrl") &&
diff --git a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
index 2a09968..7b35fec 100644
--- a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
+++ b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
@@ -83,7 +83,7 @@ public abstract class CXFTestBase {
     public void setUp() {
         this.tika = TikaConfig.getDefaultConfig();
         TikaResource.init(tika,
-                new CommonsDigester(DIGESTER_READ_LIMIT, CommonsDigester.DigestAlgorithm.MD5),
+                new CommonsDigester(DIGESTER_READ_LIMIT, "md5,sha1:32"),
                 new DefaultInputStreamFactory());
         JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
         setUpResources(sf);
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
index 50d5356..5d112ff 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
@@ -138,6 +138,8 @@ public class TikaResourceTest extends CXFTestBase {
         assertTrue(responseMsg.contains("test"));
         assertContains("<meta name=\"X-TIKA:digest:MD5\" content=\"f8be45c34e8919eedba48cc8d207fbf0\"/>",
                 responseMsg);
+        assertContains("<meta name=\"X-TIKA:digest:SHA1\" content=\"N4EBCE7EGTIGZWETEJ6WD3W4KN32TLPG\"/>",
+                responseMsg);
     }
 
     @Test

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].