You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/05/03 16:14:15 UTC
[tika] branch TIKA-4017 updated (4b27622d1 -> b19502214)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4017
in repository https://gitbox.apache.org/repos/asf/tika.git
from 4b27622d1 TIKA-4017 -- add more unit tests; don't count startxref if number component is at a literal EOF.
add f4195789f TIKA-4018 -- improve extraction of metadata from warc (#1084)
add f314d7172 Bump aws.version from 1.12.449 to 1.12.450
add 7448a1091 Merge pull request #1086 from apache/dependabot/maven/aws.version-1.12.450
add 3decc2d8f Bump azure-storage-blob from 12.21.1 to 12.22.0
add c20784593 Merge pull request #1087 from apache/dependabot/maven/com.azure-azure-storage-blob-12.22.0
add c53dbe041 Bump aws.version from 1.12.450 to 1.12.451
add 4dcb51e28 Merge pull request #1088 from apache/dependabot/maven/aws.version-1.12.451
add 43544e420 Bump maven-project-info-reports-plugin from 3.4.2 to 3.4.3
add 2e2ae0a26 Merge pull request #1089 from apache/dependabot/maven/org.apache.maven.plugins-maven-project-info-reports-plugin-3.4.3
add 9574b561c Bump woodstox-core from 6.5.0 to 6.5.1
add 1b60c0a75 Merge pull request #1090 from apache/dependabot/maven/com.fasterxml.woodstox-woodstox-core-6.5.1
add 57a539da3 Bump aws.version from 1.12.451 to 1.12.452
add 32a45b9d9 Merge pull request #1091 from apache/dependabot/maven/aws.version-1.12.452
add ad99ac298 Bump aws.version from 1.12.452 to 1.12.453
add c9629adbb Merge pull request #1093 from apache/dependabot/maven/aws.version-1.12.453
add c8a7c1a1c Bump maven-checkstyle-plugin from 3.2.1 to 3.2.2
add edd6ccccc Merge pull request #1092 from apache/dependabot/maven/org.apache.maven.plugins-maven-checkstyle-plugin-3.2.2
add 0c91c1120 Bump aws.version from 1.12.453 to 1.12.454
add 3710e4740 Merge pull request #1094 from apache/dependabot/maven/aws.version-1.12.454
add 663af05df Bump zstd-jni from 1.5.5-1 to 1.5.5-2
add 982a70de2 Merge pull request #1096 from apache/dependabot/maven/com.github.luben-zstd-jni-1.5.5-2
add 36db4e47f Bump jackson.version from 2.14.2 to 2.15.0
add 7b19ebea3 Merge pull request #1095 from apache/dependabot/maven/jackson.version-2.15.0
add 0447a0f6c Bump aws.version from 1.12.454 to 1.12.455
add 089ad7da7 Merge pull request #1097 from apache/dependabot/maven/aws.version-1.12.455
add 251f15d22 Bump aws.version from 1.12.455 to 1.12.456
add f134e072e Merge pull request #1098 from apache/dependabot/maven/aws.version-1.12.456
add 41118ed61 Bump netty-bom from 4.1.91.Final to 4.1.92.Final
add 3f96028da Merge pull request #1099 from apache/dependabot/maven/io.netty-netty-bom-4.1.92.Final
add 3a2194474 Bump google-cloud-storage from 2.22.0 to 2.22.1
add d387b61db Merge pull request #1101 from apache/dependabot/maven/com.google.cloud-google-cloud-storage-2.22.1
add ba1ab4d4a Bump aws.version from 1.12.456 to 1.12.457
add f0c8bd363 Merge pull request #1100 from apache/dependabot/maven/aws.version-1.12.457
add 9dc6a02e9 Bump aws.version from 1.12.457 to 1.12.458
add 5924ae03b Merge pull request #1103 from apache/dependabot/maven/aws.version-1.12.458
add 4298b4239 Bump aws.version from 1.12.458 to 1.12.459
add 38a71a464 Merge pull request #1104 from apache/dependabot/maven/aws.version-1.12.459
add cffd6060e Bump junit5.version from 5.9.2 to 5.9.3
add e5713742f Merge pull request #1102 from apache/dependabot/maven/junit5.version-5.9.3
add 9b10b7b7f Bump aws.version from 1.12.459 to 1.12.460
add 8f2e7e1ac Merge pull request #1105 from apache/dependabot/maven/aws.version-1.12.460
add 78d0fa4c6 Bump aws.version from 1.12.460 to 1.12.461
add 15f1c3434 Merge pull request #1106 from apache/dependabot/maven/aws.version-1.12.461
add 17dc9ad12 TIKA-4022 (#1107)
add c01c048a9 TIKA-4025 -- extract Java's ImageReader's num images into the metadata. (#1108)
add 4686032b6 Merge remote-tracking branch 'origin/main' into TIKA-4017
add b19502214 Add parsing of incremental updates.
No new revisions were added by this update.
Summary of changes:
CHANGES.txt | 12 +-
tika-app/pom.xml | 2 +-
.../services/org.apache.tika.parser.Parser | 0
tika-core/pom.xml | 2 +-
.../main/java/org/apache/tika/metadata/PDF.java | 2 +
.../apache/tika/metadata/TikaCoreProperties.java | 6 +
tika-parent/pom.xml | 18 +--
.../org/apache/tika/parser/image/ImageParser.java | 9 +-
.../apache/tika/parser/image/ImageParserTest.java | 5 +
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 131 +++++++++++++++------
.../java/org/apache/tika/parser/pdf/PDFParser.java | 51 ++++++--
.../apache/tika/parser/pdf/PDFParserConfig.java | 5 +
.../pdf/updates/IncrementalUpdateRecord.java | 27 ++---
.../parser/pdf/updates/IsIncrementalUpdate.java | 6 +-
.../tika/parser/pdf/updates/StartXRefOffset.java | 14 +++
.../tika/parser/pdf/PDFIncrementalUpdatesTest.java | 31 ++++-
.../org/apache/tika/parser/warc/WARCParser.java | 34 +++++-
.../apache/tika/parser/warc/WARCParserTest.java | 5 +
.../tika-parsers-standard-package/pom.xml | 14 +--
.../services/org.apache.tika.parser.Parser | 0
tika-translate/pom.xml | 2 +-
21 files changed, 282 insertions(+), 94 deletions(-)
copy {tika-parsers/tika-parsers-standard/tika-parsers-standard-package => tika-app}/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (100%)
copy tika-batch/src/main/java/org/apache/tika/batch/fs/FSConsumersManager.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IncrementalUpdateRecord.java (65%)
copy tika-core/src/main/java/org/apache/tika/language/detect/LanguageConfidence.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IsIncrementalUpdate.java (82%)
copy {tika-parsers/tika-parsers-standard/tika-parsers-standard-package => tika-server/tika-server-standard}/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (100%)