You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ti...@apache.org on 2022/08/14 10:19:27 UTC

[tika] branch main updated: TIKA-3819: revert last two commits of this issue because this debug output is already available when requesting HTML

This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new a6badc88a TIKA-3819: revert last two commits of this issue because this debug output is already available when requesting HTML
a6badc88a is described below

commit a6badc88a50f0794711075f3dbe6e9e8bd6512fc
Author: Tilman Hausherr <ti...@apache.org>
AuthorDate: Sun Aug 14 12:16:42 2022 +0200

    TIKA-3819: revert last two commits of this issue because this debug output is already available when requesting HTML
---
 .../java/org/apache/tika/parser/pdf/PDFParser.java | 39 ----------------------
 1 file changed, 39 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 4f2f8a6f0..e790378ae 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -16,15 +16,10 @@
  */
 package org.apache.tika.parser.pdf;
 
-import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.file.Files;
 import java.nio.file.Path;
-import java.security.DigestInputStream;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
 import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collections;
@@ -48,8 +43,6 @@ import org.apache.pdfbox.pdmodel.fixup.AbstractFixup;
 import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup;
 import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormDefaultsProcessor;
 import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -114,8 +107,6 @@ import org.apache.tika.sax.XHTMLContentHandler;
  */
 public class PDFParser extends AbstractParser implements RenderingParser, Initializable {
 
-    protected static final Logger LOG = LoggerFactory.getLogger(PDFParser.class);
-
     /**
      * Metadata key for giving the document password to the parser.
      *
@@ -165,10 +156,6 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
             } else {
                 tstream = TikaInputStream.cast(stream);
             }
-            if (LOG.isDebugEnabled() && tstream != null) {
-                LOG.debug("File: " + tstream.getPath() + ", length: " + tstream.getLength() + 
-                        ", md5: " + calcMD5(tstream.getPath()));
-            }
             password = getPassword(metadata, context);
             MemoryUsageSetting memoryUsageSetting = MemoryUsageSetting.setupMainMemoryOnly();
             if (localConfig.getMaxMainMemoryBytes() >= 0) {
@@ -775,32 +762,6 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
         defaultConfig.setImageStrategy(imageStrategy);
     }
 
-    private String calcMD5(Path path) throws IOException {
-        MessageDigest md;
-        try {
-            md = MessageDigest.getInstance("MD5");
-        }
-        catch (NoSuchAlgorithmException ex) {
-            return "No MD5";
-        }
-
-        try (InputStream is = new BufferedInputStream(Files.newInputStream(path));
-                DigestInputStream dis = new DigestInputStream(is, md)) {
-            while (dis.read() >= 0)
-                ;
-        }
-        byte[] digest = md.digest();
-        StringBuilder hexString = new StringBuilder();
-        for (byte by : digest) {
-            int ih = 0xFF & by;
-            if (ih < 16) {
-                hexString.append('0');
-            }
-            hexString.append(Integer.toHexString(ih));
-        }
-        return hexString.toString();
-    }
-
     /**
      * Copied from AcroformDefaultFixup minus generation of appearances and handling of orphan
      * widgets, which we don't need.