You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/05/03 16:14:15 UTC

[tika] branch TIKA-4017 updated (4b27622d1 -> b19502214)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-4017
in repository https://gitbox.apache.org/repos/asf/tika.git


    from 4b27622d1 TIKA-4017 -- add more unit tests; don't count startxref if number component is at a literal EOF.
     add f4195789f TIKA-4018 -- improve extraction of metadata from warc (#1084)
     add f314d7172 Bump aws.version from 1.12.449 to 1.12.450
     add 7448a1091 Merge pull request #1086 from apache/dependabot/maven/aws.version-1.12.450
     add 3decc2d8f Bump azure-storage-blob from 12.21.1 to 12.22.0
     add c20784593 Merge pull request #1087 from apache/dependabot/maven/com.azure-azure-storage-blob-12.22.0
     add c53dbe041 Bump aws.version from 1.12.450 to 1.12.451
     add 4dcb51e28 Merge pull request #1088 from apache/dependabot/maven/aws.version-1.12.451
     add 43544e420 Bump maven-project-info-reports-plugin from 3.4.2 to 3.4.3
     add 2e2ae0a26 Merge pull request #1089 from apache/dependabot/maven/org.apache.maven.plugins-maven-project-info-reports-plugin-3.4.3
     add 9574b561c Bump woodstox-core from 6.5.0 to 6.5.1
     add 1b60c0a75 Merge pull request #1090 from apache/dependabot/maven/com.fasterxml.woodstox-woodstox-core-6.5.1
     add 57a539da3 Bump aws.version from 1.12.451 to 1.12.452
     add 32a45b9d9 Merge pull request #1091 from apache/dependabot/maven/aws.version-1.12.452
     add ad99ac298 Bump aws.version from 1.12.452 to 1.12.453
     add c9629adbb Merge pull request #1093 from apache/dependabot/maven/aws.version-1.12.453
     add c8a7c1a1c Bump maven-checkstyle-plugin from 3.2.1 to 3.2.2
     add edd6ccccc Merge pull request #1092 from apache/dependabot/maven/org.apache.maven.plugins-maven-checkstyle-plugin-3.2.2
     add 0c91c1120 Bump aws.version from 1.12.453 to 1.12.454
     add 3710e4740 Merge pull request #1094 from apache/dependabot/maven/aws.version-1.12.454
     add 663af05df Bump zstd-jni from 1.5.5-1 to 1.5.5-2
     add 982a70de2 Merge pull request #1096 from apache/dependabot/maven/com.github.luben-zstd-jni-1.5.5-2
     add 36db4e47f Bump jackson.version from 2.14.2 to 2.15.0
     add 7b19ebea3 Merge pull request #1095 from apache/dependabot/maven/jackson.version-2.15.0
     add 0447a0f6c Bump aws.version from 1.12.454 to 1.12.455
     add 089ad7da7 Merge pull request #1097 from apache/dependabot/maven/aws.version-1.12.455
     add 251f15d22 Bump aws.version from 1.12.455 to 1.12.456
     add f134e072e Merge pull request #1098 from apache/dependabot/maven/aws.version-1.12.456
     add 41118ed61 Bump netty-bom from 4.1.91.Final to 4.1.92.Final
     add 3f96028da Merge pull request #1099 from apache/dependabot/maven/io.netty-netty-bom-4.1.92.Final
     add 3a2194474 Bump google-cloud-storage from 2.22.0 to 2.22.1
     add d387b61db Merge pull request #1101 from apache/dependabot/maven/com.google.cloud-google-cloud-storage-2.22.1
     add ba1ab4d4a Bump aws.version from 1.12.456 to 1.12.457
     add f0c8bd363 Merge pull request #1100 from apache/dependabot/maven/aws.version-1.12.457
     add 9dc6a02e9 Bump aws.version from 1.12.457 to 1.12.458
     add 5924ae03b Merge pull request #1103 from apache/dependabot/maven/aws.version-1.12.458
     add 4298b4239 Bump aws.version from 1.12.458 to 1.12.459
     add 38a71a464 Merge pull request #1104 from apache/dependabot/maven/aws.version-1.12.459
     add cffd6060e Bump junit5.version from 5.9.2 to 5.9.3
     add e5713742f Merge pull request #1102 from apache/dependabot/maven/junit5.version-5.9.3
     add 9b10b7b7f Bump aws.version from 1.12.459 to 1.12.460
     add 8f2e7e1ac Merge pull request #1105 from apache/dependabot/maven/aws.version-1.12.460
     add 78d0fa4c6 Bump aws.version from 1.12.460 to 1.12.461
     add 15f1c3434 Merge pull request #1106 from apache/dependabot/maven/aws.version-1.12.461
     add 17dc9ad12 TIKA-4022 (#1107)
     add c01c048a9 TIKA-4025 -- extract Java's ImageReader's num images into the metadata. (#1108)
     add 4686032b6 Merge remote-tracking branch 'origin/main' into TIKA-4017
     add b19502214 Add parsing of incremental updates.

No new revisions were added by this update.

Summary of changes:
 CHANGES.txt                                        |  12 +-
 tika-app/pom.xml                                   |   2 +-
 .../services/org.apache.tika.parser.Parser         |   0
 tika-core/pom.xml                                  |   2 +-
 .../main/java/org/apache/tika/metadata/PDF.java    |   2 +
 .../apache/tika/metadata/TikaCoreProperties.java   |   6 +
 tika-parent/pom.xml                                |  18 +--
 .../org/apache/tika/parser/image/ImageParser.java  |   9 +-
 .../apache/tika/parser/image/ImageParserTest.java  |   5 +
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  | 131 +++++++++++++++------
 .../java/org/apache/tika/parser/pdf/PDFParser.java |  51 ++++++--
 .../apache/tika/parser/pdf/PDFParserConfig.java    |   5 +
 .../pdf/updates/IncrementalUpdateRecord.java       |  27 ++---
 .../parser/pdf/updates/IsIncrementalUpdate.java    |   6 +-
 .../tika/parser/pdf/updates/StartXRefOffset.java   |  14 +++
 .../tika/parser/pdf/PDFIncrementalUpdatesTest.java |  31 ++++-
 .../org/apache/tika/parser/warc/WARCParser.java    |  34 +++++-
 .../apache/tika/parser/warc/WARCParserTest.java    |   5 +
 .../tika-parsers-standard-package/pom.xml          |  14 +--
 .../services/org.apache.tika.parser.Parser         |   0
 tika-translate/pom.xml                             |   2 +-
 21 files changed, 282 insertions(+), 94 deletions(-)
 copy {tika-parsers/tika-parsers-standard/tika-parsers-standard-package => tika-app}/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (100%)
 copy tika-batch/src/main/java/org/apache/tika/batch/fs/FSConsumersManager.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IncrementalUpdateRecord.java (65%)
 copy tika-core/src/main/java/org/apache/tika/language/detect/LanguageConfidence.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IsIncrementalUpdate.java (82%)
 copy {tika-parsers/tika-parsers-standard/tika-parsers-standard-package => tika-server/tika-server-standard}/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (100%)