You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ti...@apache.org on 2023/08/13 14:12:43 UTC
[tika] 01/02: TIKA-4114: add comment; add dummy throw TIKA-4064: avoid deprecated method
This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4f1e8f25efb866d8576b0ee29380a8a68a88fdda
Author: Tilman Hausherr <ti...@apache.org>
AuthorDate: Sun Aug 13 16:10:36 2023 +0200
TIKA-4114: add comment; add dummy throw
TIKA-4064: avoid deprecated method
---
.../java/org/apache/tika/parser/pdf/PDFParser.java | 23 ++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 15d0718d1..0be92429a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -187,6 +187,7 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
memoryUsageSetting = MemoryUsageSetting.setupMainMemoryOnly();
}
+ //TODO PDFBOX30 replace "memoryUsageSetting" with "memoryUsageSetting.streamCache"
pdfDocument = getPDDocument(stream, tstream, password, memoryUsageSetting, metadata,
context);
@@ -259,11 +260,12 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
//Do we want to also check that this is a portfolio PDF/contains a "collection"?
for (COSObject obj : fileSpecs) {
if (obj.getObject() instanceof COSDictionary) {
- COSBase relationship = obj.getDictionaryObject(AF_RELATIONSHIP);
+ COSDictionary dict = (COSDictionary) obj.getObject();
+ COSBase relationship = dict.getDictionaryObject(AF_RELATIONSHIP);
if (relationship != null && relationship.equals(ENCRYPTED_PAYLOAD)) {
String name = "";
- COSBase uf = obj.getDictionaryObject(COSName.UF);
- COSBase f = obj.getDictionaryObject(COSName.F);
+ COSBase uf = dict.getDictionaryObject(COSName.UF);
+ COSBase f = dict.getDictionaryObject(COSName.F);
if (uf != null && uf instanceof COSString) {
name = ((COSString)uf).getString();
} else if (f != null && f instanceof COSString) {
@@ -294,6 +296,8 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
List<StartXRefOffset> xRefOffsets = new ArrayList<>();
//TODO -- can we use the PDFBox parser's RandomAccessRead
//so that we don't have to reopen from file?
+ //TODO PDFBOX30 replace RandomAccessBufferedFileInputStream
+ // with RandomAccessReadBufferedFile
try (RandomAccessRead ra =
new RandomAccessBufferedFileInputStream(tikaInputStream.getFile())) {
StartXRefScanner xRefScanner = new StartXRefScanner(ra);
@@ -381,6 +385,10 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
PDMetadataExtractor.addNotNull(TikaCoreProperties.SIGNATURE_REASON,
signature.getReason(), metadata);
hasSignature = true;
+ //TODO PDFBOX30 remove this segment and the exception handling after migration
+ if (false != false) {
+ throw new IOException();
+ }
}
} catch (IOException e) {
//swallow
@@ -452,6 +460,8 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
tstream, metadata, parseContext, PageRangeRequest.RENDER_ALL);
}
+ //TODO PDFBOX30 replace "MemoryUsageSetting memoryUsageSetting" with
+ // "StreamCacheCreateFunction streamCacheCreateFunction"
protected PDDocument getPDDocument(InputStream stream, TikaInputStream tstream, String password,
MemoryUsageSetting memoryUsageSetting, Metadata metadata,
ParseContext context)
@@ -480,12 +490,16 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
}
}
+ //TODO PDFBOX30 replace "MemoryUsageSetting memoryUsageSetting" with
+ // "StreamCacheCreateFunction streamCacheCreateFunction"
protected PDDocument getPDDocument(InputStream inputStream, String password,
MemoryUsageSetting memoryUsageSetting, Metadata metadata,
ParseContext parseContext) throws IOException {
return PDDocument.load(inputStream, password, memoryUsageSetting);
}
+ //TODO PDFBOX30 replace "MemoryUsageSetting memoryUsageSetting" with
+ // "StreamCacheCreateFunction streamCacheCreateFunction"
protected PDDocument getPDDocument(Path path, String password,
MemoryUsageSetting memoryUsageSetting, Metadata metadata,
ParseContext parseContext) throws IOException {
@@ -573,7 +587,8 @@ public class PDFParser extends AbstractParser implements RenderingParser, Initia
metadata.set(AccessPermissions.CAN_MODIFY_ANNOTATIONS,
Boolean.toString(ap.canModifyAnnotations()));
metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(ap.canPrint()));
- metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintDegraded()));
+ //TODO PDFBOX30 replace "CAN_PRINT_DEGRADED" with "CAN_PRINT_FAITHFUL"
+ metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintFaithful()));
metadata.set(PDF.IS_ENCRYPTED, Boolean.toString(document.isEncrypted()));
if (document.getDocumentCatalog().getLanguage() != null) {