You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/06 18:14:51 UTC

[tika] 04/05: Merge remote-tracking branch 'origin/main' into main

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ba9fdc1b507ea85fb261912a890d8eb45514d031
Merge: fc19baf5f 91d389f06
Author: tballison <ta...@apache.org>
AuthorDate: Fri May 6 13:09:07 2022 -0400

    Merge remote-tracking branch 'origin/main' into main
    
    # Conflicts:
    #       tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java

 ...n-jdk11-build.yml => branch_1x-jdk11-build.yml} |   4 +-
 ...in-jdk11-build.yml => branch_1x-jdk8-build.yml} |   8 +-
 .github/workflows/main-jdk11-build.yml             |   2 +-
 .github/workflows/main-jdk17-build.yml             |   2 +-
 .github/workflows/main-jdk8-build.yml              |   2 +-
 CHANGES.txt                                        |  21 +-
 pom.xml                                            |   6 +-
 tika-app/pom.xml                                   |   2 +-
 tika-batch/pom.xml                                 |   2 +-
 tika-bom/pom.xml                                   | 128 ++++++-------
 tika-bundles/pom.xml                               |   4 +-
 tika-bundles/tika-bundle-standard/pom.xml          |   4 +-
 tika-core/pom.xml                                  |   2 +-
 .../java/org/apache/tika/config/TikaConfig.java    | 139 ++++++++++++--
 .../tika/extractor/EmbeddedDocumentUtil.java       |   4 +-
 .../main/java/org/apache/tika/io/EndianUtils.java  |  19 ++
 .../java/org/apache/tika/metadata/Rendering.java   |  17 +-
 .../apache/tika/metadata/TikaCoreProperties.java   |   5 +-
 .../metadata/filter/DefaultMetadataFilter.java     |   6 +-
 .../java/org/apache/tika/parser/DefaultParser.java |  37 +++-
 .../org/apache/tika/parser/RenderingParser.java    |  16 +-
 .../tika/parser/external/ExternalParser.java       |   3 +
 .../tika/parser/external2/ExternalParser.java      |  15 +-
 .../apache/tika/renderer/CompositeRenderer.java    | 102 ++++++++++
 .../tika/renderer/PageBasedRenderResults.java      |  50 +++++
 .../org/apache/tika/renderer/PageRangeRequest.java |  60 ++++++
 .../org/apache/tika/renderer/RenderRequest.java    |  21 +-
 .../org/apache/tika/renderer/RenderResult.java     |  62 ++++++
 .../RenderResults.java}                            |  41 ++--
 .../java/org/apache/tika/renderer/Renderer.java    |  62 ++++++
 .../org/apache/tika/renderer/RenderingState.java   |  18 +-
 .../org/apache/tika/renderer/RenderingTracker.java |  19 +-
 .../org/apache/tika/mime/tika-mimetypes.xml        |  21 ++
 .../java/org/apache/tika/io/EndianUtilsTest.java   |  18 ++
 tika-eval/pom.xml                                  |   2 +-
 tika-eval/tika-eval-app/pom.xml                    |   2 +-
 tika-eval/tika-eval-core/pom.xml                   |   2 +-
 tika-example/pom.xml                               |   2 +-
 tika-fuzzing/pom.xml                               |   2 +-
 tika-integration-tests/pom.xml                     |   2 +-
 .../pom.xml                                        |   2 +-
 .../opensearch/tests/TikaPipesOpenSearchTest.java  |   2 +-
 .../tika-pipes-s3-integration-tests/pom.xml        |   2 +-
 .../tika-pipes-solr-integration-tests/pom.xml      |   2 +-
 .../tika/pipes/solr/tests/TikaPipesSolr8Test.java  |   2 +-
 .../tika-resource-loading-tests}/pom.xml           |  31 +--
 .../org/apache/custom/parser/CustomParserTest.java |  43 +++++
 .../org/apache/custom/parser/MyCustomParser.java   |  51 +++++
 .../services/org.apache.tika.parser.Parser         |   5 +-
 tika-java7/pom.xml                                 |   2 +-
 tika-langdetect/pom.xml                            |   2 +-
 tika-langdetect/tika-langdetect-lingo24/pom.xml    |   2 +-
 tika-langdetect/tika-langdetect-mitll-text/pom.xml |   2 +-
 tika-langdetect/tika-langdetect-opennlp/pom.xml    |   2 +-
 tika-langdetect/tika-langdetect-optimaize/pom.xml  |   2 +-
 .../tika-langdetect-test-commons/pom.xml           |   2 +-
 tika-langdetect/tika-langdetect-tika/pom.xml       |   2 +-
 tika-parent/pom.xml                                |  37 ++--
 tika-parsers/pom.xml                               |   2 +-
 tika-parsers/tika-parsers-extended/pom.xml         |   2 +-
 .../tika-parser-scientific-module/pom.xml          |   2 +-
 .../tika-parser-scientific-package/pom.xml         |   2 +-
 .../tika-parser-sqlite3-module/pom.xml             |   2 +-
 .../tika-parser-sqlite3-package/pom.xml            |   2 +-
 .../pom.xml                                        |   2 +-
 tika-parsers/tika-parsers-ml/pom.xml               |   2 +-
 .../tika-parsers-ml/tika-age-recogniser/pom.xml    |   2 +-
 tika-parsers/tika-parsers-ml/tika-dl/pom.xml       |   2 +-
 .../tika-parser-advancedmedia-module/pom.xml       |   2 +-
 .../tika-parser-advancedmedia-package/pom.xml      |   9 +-
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |   2 +-
 .../tika-parser-nlp-package/pom.xml                |   9 +-
 .../tika-parsers-ml/tika-transcribe-aws/pom.xml    |   2 +-
 tika-parsers/tika-parsers-standard/pom.xml         |   2 +-
 .../tika-parsers-standard-modules/pom.xml          |   2 +-
 .../tika-parser-apple-module/pom.xml               |   2 +-
 .../tika-parser-audiovideo-module/pom.xml          |   2 +-
 .../tika-parser-cad-module/pom.xml                 |   8 +-
 .../org/apache/tika/parser/dgn/DGN8Parser.java     |  88 +++++++++
 .../java/org/apache/tika/parser/dwg/DWGParser.java |  10 +-
 .../services/org.apache.tika.parser.Parser         |   1 +
 .../apache/tika/parser/dgn/DGN8ParserTest.java}    |  33 ++--
 .../org/apache/tika/parser/dwg/DWGParserTest.java  |  25 ++-
 .../src/test/resources/test-documents/testDGN7.dgn | Bin 0 -> 33792 bytes
 .../src/test/resources/test-documents/testDGN8.dgn | Bin 0 -> 28160 bytes
 .../resources/test-documents/testDWG-AC1027.dwg    | Bin 0 -> 265260 bytes
 .../resources/test-documents/testDWG-AC1032.dwg    | Bin 0 -> 158593 bytes
 .../tika-parser-code-module/pom.xml                |   2 +-
 .../tika-parser-crypto-module/pom.xml              |   2 +-
 .../tika-parser-digest-commons/pom.xml             |   2 +-
 .../tika-parser-font-module/pom.xml                |   2 +-
 .../tika-parser-html-commons/pom.xml               |   2 +-
 .../tika-parser-html-module/pom.xml                |   2 +-
 .../tika-parser-image-module/pom.xml               |   2 +-
 .../tika-parser-jdbc-commons/pom.xml               |   2 +-
 .../tika-parser-mail-commons/pom.xml               |   2 +-
 .../tika-parser-mail-module/pom.xml                |   2 +-
 .../tika-parser-microsoft-module/pom.xml           |   2 +-
 .../detect/microsoft/POIFSContainerDetector.java   |   5 +
 .../tika/parser/microsoft/HSLFExtractor.java       |  66 ++++---
 .../apache/tika/parser/microsoft/OfficeParser.java |   4 +-
 .../tika/parser/microsoft/OutlookExtractor.java    |  42 ++++
 .../parser/microsoft/PowerPointParserTest.java     |   8 +-
 .../tika-parser-miscoffice-module/pom.xml          |   2 +-
 .../apache/tika/parser/odf/OpenDocumentParser.java |   2 +-
 .../org/apache/tika/parser/odf/ODFParserTest.java  |   2 +-
 .../tika-parser-news-module/pom.xml                |   2 +-
 .../tika-parser-ocr-module/pom.xml                 |   2 +-
 .../tika-parser-pdf-module/pom.xml                 |   9 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  | 212 +++++++++++++++------
 .../java/org/apache/tika/parser/pdf/OCR2XHTML.java |  15 +-
 .../java/org/apache/tika/parser/pdf/PDF2XHTML.java |  18 +-
 .../tika/parser/pdf/PDFMarkedContent2XHTML.java    |  13 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java | 163 +++++++++++++---
 .../apache/tika/parser/pdf/PDFParserConfig.java    | 212 +++++++++++----------
 .../tika/parser/pdf/TextOnlyPDFRenderer.java       | 106 +++++++++++
 .../apache/tika/renderer/pdf/MuPDFRenderer.java    | 149 +++++++++++++++
 .../tika/renderer/pdf/PDDocumentRenderer.java      |  19 +-
 .../apache/tika/renderer/pdf/PDFBoxRenderer.java   | 198 +++++++++++++++++++
 .../tika/renderer/pdf/PDFRenderingState.java       |  31 ++-
 .../apache/tika/parser/pdf/PDFRenderingTest.java   | 109 +++++++++++
 .../tika/parser/pdf/tika-rendering-config.xml      |  25 +--
 .../tika-parser-pkg-module/pom.xml                 |   2 +-
 .../java/org/apache/tika/parser/pkg/RarParser.java |  21 +-
 .../tika-parser-text-module/pom.xml                |   2 +-
 .../tika-parser-webarchive-module/pom.xml          |   2 +-
 .../tika-parser-xml-module/pom.xml                 |   2 +-
 .../tika-parser-xmp-commons/pom.xml                |   2 +-
 .../tika-parser-zip-commons/pom.xml                |   2 +-
 .../tika-parsers-standard-package/pom.xml          |   2 +-
 .../tika/detect/TestContainerAwareDetector.java    |   1 +
 .../java/org/apache/tika/mime/TestMimeTypes.java   |   7 +
 .../apache/tika/parser/crypto/TSDParserTest.java   |   8 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  27 +++
 .../apache/tika/utils/ServiceLoaderUtilsTest.java  |   4 +-
 .../configs/tika-rendering-mupdf-config.xml        |  25 +--
 tika-pipes/pom.xml                                 |   6 +-
 tika-pipes/tika-emitters/pom.xml                   |   2 +-
 .../tika-emitters/tika-emitter-az-blob/pom.xml     |   2 +-
 tika-pipes/tika-emitters/tika-emitter-fs/pom.xml   |   2 +-
 tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml  |   2 +-
 .../tika-emitters/tika-emitter-opensearch/pom.xml  |   2 +-
 tika-pipes/tika-emitters/tika-emitter-s3/pom.xml   |   2 +-
 tika-pipes/tika-emitters/tika-emitter-solr/pom.xml |   2 +-
 tika-pipes/tika-fetchers/pom.xml                   |   2 +-
 .../tika-fetchers/tika-fetcher-az-blob/pom.xml     |   2 +-
 tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml  |   2 +-
 tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml |   2 +-
 tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml   |   2 +-
 tika-pipes/tika-httpclient-commons/pom.xml         |   2 +-
 tika-pipes/tika-pipes-iterators/pom.xml            |   2 +-
 .../tika-pipes-iterator-az-blob/pom.xml            |   2 +-
 .../tika-pipes-iterator-csv/pom.xml                |   2 +-
 .../tika-pipes-iterator-gcs/pom.xml                |   2 +-
 .../tika-pipes-iterator-jdbc/pom.xml               |   2 +-
 .../tika-pipes-iterator-s3/pom.xml                 |   2 +-
 .../tika-pipes-iterator-solr/pom.xml               |   2 +-
 tika-serialization/pom.xml                         |   2 +-
 tika-server/pom.xml                                |   2 +-
 tika-server/tika-server-client/pom.xml             |   2 +-
 tika-server/tika-server-core/pom.xml               |   2 +-
 .../apache/tika/server/core/TikaServerConfig.java  |  46 ++++-
 .../apache/tika/server/core/TikaServerProcess.java |  56 +++++-
 .../tika/server/core/TikaServerWatchDog.java       |   2 -
 .../org/apache/tika/server/core/TlsConfig.java     | 171 +++++++++++++++++
 .../tika/server/core/IntegrationTestBase.java      |   7 +-
 .../tika/server/core/TikaResourceFetcherTest.java  |   4 +-
 .../tika/server/core/TikaServerConfigTest.java     |  48 +++++
 .../server/core/TikaServerIntegrationTest.java     | 182 ++++++++++++++++++
 .../tika-config-server-fetcher-template.xml        |   4 +-
 ...=> tika-config-server-tls-one-way-template.xml} |  31 +--
 ...=> tika-config-server-tls-two-way-template.xml} |  34 ++--
 ...her-template.xml => tika-config-server-tls.xml} |  29 +--
 .../src/test/resources/ssl-keys/README.txt         |  28 +++
 .../resources/ssl-keys/tika-client-keystore.p12    | Bin 0 -> 2505 bytes
 .../resources/ssl-keys/tika-client-truststore.p12  | Bin 0 -> 3429 bytes
 .../src/test/resources/ssl-keys/tika-client.crt    | Bin 0 -> 789 bytes
 .../resources/ssl-keys/tika-server-keystore.p12    | Bin 0 -> 2505 bytes
 .../resources/ssl-keys/tika-server-truststore.p12  | Bin 0 -> 3429 bytes
 .../src/test/resources/ssl-keys/tika-server.crt    | Bin 0 -> 789 bytes
 tika-server/tika-server-eval/pom.xml               |   8 +-
 tika-server/tika-server-standard/pom.xml           |   2 +-
 tika-translate/pom.xml                             |   2 +-
 tika-xmp/pom.xml                                   |   2 +-
 184 files changed, 2880 insertions(+), 717 deletions(-)