You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ti...@apache.org on 2022/08/14 10:19:27 UTC
[tika] branch main updated: TIKA-3819: revert last two commits of this issue because this debug output is already available when requesting HTML
This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new a6badc88a TIKA-3819: revert last two commits of this issue because this debug output is already available when requesting HTML
a6badc88a is described below
commit a6badc88a50f0794711075f3dbe6e9e8bd6512fc
Author: Tilman Hausherr <ti...@apache.org>
AuthorDate: Sun Aug 14 12:16:42 2022 +0200
TIKA-3819: revert last two commits of this issue because this debug output is already available when requesting HTML
---
.../java/org/apache/tika/parser/pdf/PDFParser.java | 39 ----------------------
1 file changed, 39 deletions(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 4f2f8a6f0..e790378ae 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -16,15 +16,10 @@
*/
package org.apache.tika.parser.pdf;
-import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.nio.file.Files;
import java.nio.file.Path;
-import java.security.DigestInputStream;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
@@ -48,8 +43,6 @@ import org.apache.pdfbox.pdmodel.fixup.AbstractFixup;
import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup;
import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormDefaultsProcessor;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -114,8 +107,6 @@ import org.apache.tika.sax.XHTMLContentHandler;
*/
public class PDFParser extends AbstractParser implements RenderingParser, Initializable {
- protected static final Logger LOG = LoggerFactory.getLogger(PDFParser.class);
-
/**
* Metadata key for giving the document password to the parser.
*
@@ -165,10 +156,6 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
} else {
tstream = TikaInputStream.cast(stream);
}
- if (LOG.isDebugEnabled() && tstream != null) {
- LOG.debug("File: " + tstream.getPath() + ", length: " + tstream.getLength() +
- ", md5: " + calcMD5(tstream.getPath()));
- }
password = getPassword(metadata, context);
MemoryUsageSetting memoryUsageSetting = MemoryUsageSetting.setupMainMemoryOnly();
if (localConfig.getMaxMainMemoryBytes() >= 0) {
@@ -775,32 +762,6 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
defaultConfig.setImageStrategy(imageStrategy);
}
- private String calcMD5(Path path) throws IOException {
- MessageDigest md;
- try {
- md = MessageDigest.getInstance("MD5");
- }
- catch (NoSuchAlgorithmException ex) {
- return "No MD5";
- }
-
- try (InputStream is = new BufferedInputStream(Files.newInputStream(path));
- DigestInputStream dis = new DigestInputStream(is, md)) {
- while (dis.read() >= 0)
- ;
- }
- byte[] digest = md.digest();
- StringBuilder hexString = new StringBuilder();
- for (byte by : digest) {
- int ih = 0xFF & by;
- if (ih < 16) {
- hexString.append('0');
- }
- hexString.append(Integer.toHexString(ih));
- }
- return hexString.toString();
- }
-
/**
* Copied from AcroformDefaultFixup minus generation of appearances and handling of orphan
* widgets, which we don't need.