You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/06 18:14:50 UTC
[tika] 03/05: Merge remote-tracking branch 'origin/main' into main
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit fc19baf5f1ac57084dc8915cd20bb36a65b3e275
Merge: 283aa339b 4b20970cb
Author: tballison <ta...@apache.org>
AuthorDate: Thu Apr 21 07:31:58 2022 -0400
Merge remote-tracking branch 'origin/main' into main
CHANGES.txt | 27 ++++-
tika-bundles/tika-bundle-standard/pom.xml | 2 +-
.../EmbeddedDocumentExtractorFactory.java | 18 +---
.../tika/extractor/EmbeddedDocumentUtil.java | 26 ++---
.../ParsingEmbeddedDocumentExtractor.java | 8 +-
.../ParsingEmbeddedDocumentExtractorFactory.java | 28 +++---
.../java/org/apache/tika/metadata/Metadata.java | 4 -
.../apache/tika/metadata/TikaCoreProperties.java | 16 +++
.../org/apache/tika/parser/AutoDetectParser.java | 30 +++---
.../apache/tika/parser/AutoDetectParserConfig.java | 41 +++++++-
.../org/apache/tika/parser/CompositeParser.java | 39 +++++++-
.../tika/sax/ContentHandlerDecoratorFactory.java | 18 +---
.../java/org/apache/tika/sax/StandardsText.java | 2 +-
.../java/org/apache/tika/utils/ParserUtils.java | 15 ++-
.../metadatafilter/OpenNLPMetadataFilter.java | 49 +++++++++
.../optimaize/OptimaizeLangDetector.java | 13 ++-
.../metadatafilter/OptimaizeMetadataFilter.java | 49 +++++++++
tika-parent/pom.xml | 17 ++--
.../detect/microsoft/POIFSContainerDetector.java | 66 ++++++++++---
.../parser/microsoft/AbstractPOIFSExtractor.java | 33 ++++---
.../tika/parser/microsoft/ExcelExtractor.java | 2 +-
.../tika/parser/microsoft/HSLFExtractor.java | 9 +-
.../tika/parser/microsoft/JackcessExtractor.java | 2 +-
.../apache/tika/parser/microsoft/OfficeParser.java | 24 ++++-
.../tika/parser/microsoft/OutlookExtractor.java | 2 +-
.../tika/parser/microsoft/WordExtractor.java | 2 +-
.../microsoft/onenote/OneNoteTreeWalker.java | 6 +-
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 2 +-
.../parser/microsoft/pst/OutlookPSTParser.java | 2 +-
.../tika/parser/microsoft/xml/WordMLParser.java | 8 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 13 +--
.../tika/parser/pdf/ImageGraphicsEngine.java | 4 +-
.../org/apache/tika/parser/pdf/PDFParserTest.java | 2 +-
.../tika/parser/AutoDetectParserConfigTest.java | 67 +++++++++++++
.../tika/parser/microsoft/XML2003ParserTest.java | 4 +-
.../tika/parser/microsoft/rtf/RTFParserTest.java | 14 +++
.../tika/parser/ocr/TesseractOCRParserTest.java | 5 +
.../UpcasingContentHandlerDecoratorFactory.java | 25 ++---
.../resources/configs/tika-config-no-names.xml | 33 +++++++
...ka-config-upcasing-custom-handler-decorator.xml | 29 ++++++
.../resources/configs/tika-config-with-names.xml | 33 +++++++
tika-pipes/pom.xml | 4 +-
.../org/apache/tika/client/HttpClientFactory.java | 15 +--
.../server/core/DefaultInputStreamFactory.java | 7 ++
.../tika/server/core/FetcherStreamFactory.java | 30 ++++--
.../tika/server/core/InputStreamFactory.java | 15 +++
.../tika/server/core/TikaServerWatchDog.java | 8 +-
.../server/core/resource/DetectorResource.java | 5 +-
.../server/core/resource/MetadataResource.java | 5 +-
.../core/resource/RecursiveMetadataResource.java | 2 +-
.../tika/server/core/resource/TikaResource.java | 15 +--
.../server/core/resource/UnpackerResource.java | 6 +-
.../org/apache/tika/server/core/CXFTestBase.java | 9 +-
.../org/apache/tika/server/core/TikaPipesTest.java | 3 +-
.../tika/server/core/TikaResourceFetcherTest.java | 110 +++++++++++++++++++++
.../tika-config-server-fetcher-template.xml | 38 +++++++
tika-server/tika-server-eval/pom.xml | 1 +
tika-server/tika-server-standard/pom.xml | 22 +++--
.../standard/resource/XMPMetadataResource.java | 3 +-
.../apache/tika/server/standard/FetcherTest.java | 12 +--
...herTest.java => OpenNLPMetadataFilterTest.java} | 74 +++++++-------
...rTest.java => OptimaizeMetadataFilterTest.java} | 73 +++++++-------
.../apache/tika/server/standard/TikaPipesTest.java | 6 +-
.../tika/server/standard/TikaResourceTest.java | 5 +
.../tika-config-langdetect-opennlp-filter.xml | 32 ++++++
.../tika-config-langdetect-optimaize-filter.xml | 32 ++++++
66 files changed, 1032 insertions(+), 289 deletions(-)