You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/11/10 15:15:23 UTC
svn commit: r1637868 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/pdf/PDFParser.java
test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Author: tallison
Date: Mon Nov 10 14:15:22 2014
New Revision: 1637868
URL: http://svn.apache.org/r1637868
Log:
TIKA-1467: in PDFParser, move metadata set isEncrypted() to before decryption step.
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1637868&r1=1637867&r2=1637868&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Mon Nov 10 14:15:22 2014
@@ -127,8 +127,9 @@ public class PDFParser extends AbstractP
pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
}
}
-
-
+
+ metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted()));
+
if (pdfDocument.isEncrypted()) {
String password = null;
@@ -225,7 +226,6 @@ public class PDFParser extends AbstractP
addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
}
}
- metadata.set("pdf:encrypted", Boolean.toString(document.isEncrypted()));
//try to get the various versions
//Caveats:
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1637868&r1=1637867&r2=1637868&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java Mon Nov 10 14:15:22 2014
@@ -160,6 +160,7 @@ public class PDFParserTest extends TikaT
stream.close();
}
+ assertEquals("true", metadata.get("pdf:encrypted"));
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("The Bank of England", metadata.get(Metadata.AUTHOR));
@@ -191,6 +192,7 @@ public class PDFParserTest extends TikaT
} finally {
stream.close();
}
+ assertEquals("true", metadata.get("pdf:encrypted"));
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
@@ -569,6 +571,8 @@ public class PDFParserTest extends TikaT
knownMetadataDiffs.add("testAnnotations.pdf");
// Added for TIKA-93.
knownMetadataDiffs.add("testOCR.pdf");
+ //PDFBox-2490/TIKA-1467; should be ok with version >= PDFBox 1.8.8
+ knownMetadataDiffs.add("testPDF_protected.pdf");
//empty for now
Set<String> knownContentDiffs = new HashSet<String>();