You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/05/03 17:01:56 UTC
[tika] branch main updated (c01c048a9 -> b116d71bf)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
from c01c048a9 TIKA-4025 -- extract Java's ImageReader's num images into the metadata. (#1108)
add b116d71bf TIKA-4017 -- extract incremental updates (#1085)
No new revisions were added by this update.
Summary of changes:
CHANGES.txt | 3 +
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 8 +-
.../java/org/apache/tika/metadata/Metadata.java | 24 ++
.../main/java/org/apache/tika/metadata/PDF.java | 19 ++
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 131 ++++++++---
.../java/org/apache/tika/parser/pdf/PDFParser.java | 100 ++++++++
.../apache/tika/parser/pdf/PDFParserConfig.java | 36 +++
.../pdf/updates/IncrementalUpdateRecord.java | 27 +--
.../parser/pdf/updates/IsIncrementalUpdate.java | 6 +-
.../tika/parser/pdf/updates/StartXRefOffset.java | 68 ++++++
.../tika/parser/pdf/updates/StartXRefScanner.java | 252 +++++++++++++++++++++
.../tika/parser/pdf/PDFIncrementalUpdatesTest.java | 172 ++++++++++++++
.../src/test/resources/log4j2.xml | 2 +-
.../test-documents/testPDF_incrementalUpdates.pdf | Bin 0 -> 64872 bytes
14 files changed, 789 insertions(+), 59 deletions(-)
copy tika-batch/src/main/java/org/apache/tika/batch/fs/FSConsumersManager.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IncrementalUpdateRecord.java (65%)
copy tika-core/src/main/java/org/apache/tika/language/detect/LanguageConfidence.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IsIncrementalUpdate.java (82%)
create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/StartXRefOffset.java
create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/StartXRefScanner.java
create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFIncrementalUpdatesTest.java
create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_incrementalUpdates.pdf