You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/11/10 15:15:23 UTC

svn commit: r1637868 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/pdf/PDFParser.java test/java/org/apache/tika/parser/pdf/PDFParserTest.java

Author: tallison
Date: Mon Nov 10 14:15:22 2014
New Revision: 1637868

URL: http://svn.apache.org/r1637868
Log:
TIKA-1467: in PDFParser, move metadata set isEncrypted() to before decryption step.

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1637868&r1=1637867&r2=1637868&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Mon Nov 10 14:15:22 2014
@@ -127,8 +127,9 @@ public class PDFParser extends AbstractP
                     pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
                 }
             }
-            
-           
+
+            metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted()));
+
             if (pdfDocument.isEncrypted()) {
                 String password = null;
                 
@@ -225,7 +226,6 @@ public class PDFParser extends AbstractP
         	addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
             }
         }
-        metadata.set("pdf:encrypted", Boolean.toString(document.isEncrypted()));
 
         //try to get the various versions
         //Caveats:

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1637868&r1=1637867&r2=1637868&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java Mon Nov 10 14:15:22 2014
@@ -160,6 +160,7 @@ public class PDFParserTest extends TikaT
            stream.close();
        }
 
+       assertEquals("true", metadata.get("pdf:encrypted"));
        assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
        assertEquals("The Bank of England", metadata.get(Metadata.AUTHOR));
@@ -191,6 +192,7 @@ public class PDFParserTest extends TikaT
        } finally {
           stream.close();
        }
+       assertEquals("true", metadata.get("pdf:encrypted"));
 
        assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
@@ -569,6 +571,8 @@ public class PDFParserTest extends TikaT
         knownMetadataDiffs.add("testAnnotations.pdf");
         // Added for TIKA-93.
         knownMetadataDiffs.add("testOCR.pdf");
+        //PDFBox-2490/TIKA-1467; should be ok with version >= PDFBox 1.8.8
+        knownMetadataDiffs.add("testPDF_protected.pdf");
 
         //empty for now
         Set<String> knownContentDiffs = new HashSet<String>();