You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/06 18:14:50 UTC

[tika] 03/05: Merge remote-tracking branch 'origin/main' into main

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit fc19baf5f1ac57084dc8915cd20bb36a65b3e275
Merge: 283aa339b 4b20970cb
Author: tballison <ta...@apache.org>
AuthorDate: Thu Apr 21 07:31:58 2022 -0400

    Merge remote-tracking branch 'origin/main' into main

 CHANGES.txt                                        |  27 ++++-
 tika-bundles/tika-bundle-standard/pom.xml          |   2 +-
 .../EmbeddedDocumentExtractorFactory.java          |  18 +---
 .../tika/extractor/EmbeddedDocumentUtil.java       |  26 ++---
 .../ParsingEmbeddedDocumentExtractor.java          |   8 +-
 .../ParsingEmbeddedDocumentExtractorFactory.java   |  28 +++---
 .../java/org/apache/tika/metadata/Metadata.java    |   4 -
 .../apache/tika/metadata/TikaCoreProperties.java   |  16 +++
 .../org/apache/tika/parser/AutoDetectParser.java   |  30 +++---
 .../apache/tika/parser/AutoDetectParserConfig.java |  41 +++++++-
 .../org/apache/tika/parser/CompositeParser.java    |  39 +++++++-
 .../tika/sax/ContentHandlerDecoratorFactory.java   |  18 +---
 .../java/org/apache/tika/sax/StandardsText.java    |   2 +-
 .../java/org/apache/tika/utils/ParserUtils.java    |  15 ++-
 .../metadatafilter/OpenNLPMetadataFilter.java      |  49 +++++++++
 .../optimaize/OptimaizeLangDetector.java           |  13 ++-
 .../metadatafilter/OptimaizeMetadataFilter.java    |  49 +++++++++
 tika-parent/pom.xml                                |  17 ++--
 .../detect/microsoft/POIFSContainerDetector.java   |  66 ++++++++++---
 .../parser/microsoft/AbstractPOIFSExtractor.java   |  33 ++++---
 .../tika/parser/microsoft/ExcelExtractor.java      |   2 +-
 .../tika/parser/microsoft/HSLFExtractor.java       |   9 +-
 .../tika/parser/microsoft/JackcessExtractor.java   |   2 +-
 .../apache/tika/parser/microsoft/OfficeParser.java |  24 ++++-
 .../tika/parser/microsoft/OutlookExtractor.java    |   2 +-
 .../tika/parser/microsoft/WordExtractor.java       |   2 +-
 .../microsoft/onenote/OneNoteTreeWalker.java       |   6 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |   2 +-
 .../parser/microsoft/pst/OutlookPSTParser.java     |   2 +-
 .../tika/parser/microsoft/xml/WordMLParser.java    |   8 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  13 +--
 .../tika/parser/pdf/ImageGraphicsEngine.java       |   4 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |   2 +-
 .../tika/parser/AutoDetectParserConfigTest.java    |  67 +++++++++++++
 .../tika/parser/microsoft/XML2003ParserTest.java   |   4 +-
 .../tika/parser/microsoft/rtf/RTFParserTest.java   |  14 +++
 .../tika/parser/ocr/TesseractOCRParserTest.java    |   5 +
 .../UpcasingContentHandlerDecoratorFactory.java    |  25 ++---
 .../resources/configs/tika-config-no-names.xml     |  33 +++++++
 ...ka-config-upcasing-custom-handler-decorator.xml |  29 ++++++
 .../resources/configs/tika-config-with-names.xml   |  33 +++++++
 tika-pipes/pom.xml                                 |   4 +-
 .../org/apache/tika/client/HttpClientFactory.java  |  15 +--
 .../server/core/DefaultInputStreamFactory.java     |   7 ++
 .../tika/server/core/FetcherStreamFactory.java     |  30 ++++--
 .../tika/server/core/InputStreamFactory.java       |  15 +++
 .../tika/server/core/TikaServerWatchDog.java       |   8 +-
 .../server/core/resource/DetectorResource.java     |   5 +-
 .../server/core/resource/MetadataResource.java     |   5 +-
 .../core/resource/RecursiveMetadataResource.java   |   2 +-
 .../tika/server/core/resource/TikaResource.java    |  15 +--
 .../server/core/resource/UnpackerResource.java     |   6 +-
 .../org/apache/tika/server/core/CXFTestBase.java   |   9 +-
 .../org/apache/tika/server/core/TikaPipesTest.java |   3 +-
 .../tika/server/core/TikaResourceFetcherTest.java  | 110 +++++++++++++++++++++
 .../tika-config-server-fetcher-template.xml        |  38 +++++++
 tika-server/tika-server-eval/pom.xml               |   1 +
 tika-server/tika-server-standard/pom.xml           |  22 +++--
 .../standard/resource/XMPMetadataResource.java     |   3 +-
 .../apache/tika/server/standard/FetcherTest.java   |  12 +--
 ...herTest.java => OpenNLPMetadataFilterTest.java} |  74 +++++++-------
 ...rTest.java => OptimaizeMetadataFilterTest.java} |  73 +++++++-------
 .../apache/tika/server/standard/TikaPipesTest.java |   6 +-
 .../tika/server/standard/TikaResourceTest.java     |   5 +
 .../tika-config-langdetect-opennlp-filter.xml      |  32 ++++++
 .../tika-config-langdetect-optimaize-filter.xml    |  32 ++++++
 66 files changed, 1032 insertions(+), 289 deletions(-)