You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by nd...@apache.org on 2024/03/29 07:30:52 UTC
(tika) 01/01: Merge branch 'main' of github.com:apache/tika into TIKA-4181-grpc
This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch TIKA-4181-grpc
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 100ef9c3063e49106a2c7fbff4942bbe9edc7042
Merge: 322452021 941f8f26c
Author: Nicholas DiPiazza <nd...@apache.org>
AuthorDate: Fri Mar 29 02:30:30 2024 -0500
Merge branch 'main' of github.com:apache/tika into TIKA-4181-grpc
CHANGES.txt | 2 +
tika-app/pom.xml | 1 +
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 2 +-
.../java/org/apache/tika/cli/TikaCLIAsyncTest.java | 89 +++++++
.../test/java/org/apache/tika/cli/TikaCLITest.java | 59 +---
tika-batch/pom.xml | 3 +
tika-core/src/main/java/org/apache/tika/Tika.java | 4 +
.../org/apache/tika/detect/AutoDetectReader.java | 38 +--
.../tika/detect/CompositeEncodingDetector.java | 7 +
.../AbstractEmbeddedDocumentBytesHandler.java | 69 +++++
.../tika/extractor/BasicEmbeddedBytesSelector.java | 77 ++++++
.../BasicEmbeddedDocumentBytesHandler.java | 58 ++++
.../tika/extractor/EmbeddedBytesSelector.java | 31 +--
.../EmbeddedDocumentByteStoreExtractorFactory.java | 36 +--
.../extractor/EmbeddedDocumentBytesHandler.java | 32 +--
.../ParsingEmbeddedDocumentExtractor.java | 10 +-
.../apache/tika/extractor/RUnpackExtractor.java | 183 +++++++++++++
.../tika/extractor/RUnpackExtractorFactory.java | 111 ++++++++
.../org/apache/tika/io/BoundedInputStream.java | 31 ++-
.../main/java/org/apache/tika/metadata/IPTC.java | 8 +
.../main/java/org/apache/tika/metadata/PDF.java | 6 +
.../apache/tika/metadata/TikaCoreProperties.java | 20 ++
.../main/java/org/apache/tika/mime/MimeTypes.java | 4 +-
.../org/apache/tika/parser/AbstractParser.java | 1 +
.../org/apache/tika/parser/AutoDetectParser.java | 11 +-
.../apache/tika/parser/AutoDetectParserConfig.java | 4 +-
.../org/apache/tika/parser/ParserDecorator.java | 1 +
.../apache/tika/parser/RecursiveParserWrapper.java | 2 +
.../parser/multiple/AbstractMultipleParser.java | 1 +
.../java/org/apache/tika/pipes/FetchEmitTuple.java | 52 +++-
.../java/org/apache/tika/pipes/PipesServer.java | 296 +++++++++++++++------
.../extractor/EmbeddedDocumentBytesConfig.java | 167 ++++++++++++
.../EmittingEmbeddedDocumentBytesHandler.java | 73 +++++
.../org/apache/tika/mime/tika-mimetypes.xml | 89 ++++++-
.../java/org/apache/tika/TikaDetectionTest.java | 2 +-
.../tika/parser/AutoDetectParserConfigTest.java | 72 +++++
.../org/apache/tika/parser/mock/MockParser.java | 26 +-
.../org/apache/tika/pipes/PipesServerTest.java | 120 ++++++++-
...rocessorTest.java => AsyncChaosMonkeyTest.java} | 2 +-
.../config/TIKA-4207-embedded-bytes-config.xml | 13 +-
.../apache/tika/pipes/TIKA-4207-limit-bytes.xml | 19 +-
.../resources/org/apache/tika/pipes/TIKA-4207.xml | 19 +-
tika-eval/tika-eval-app/pom.xml | 7 +-
.../org/apache/tika/eval/app/AbstractProfiler.java | 17 +-
.../org/apache/tika/eval/app/ExtractProfiler.java | 4 +
.../java/org/apache/tika/eval/app/db/Cols.java | 3 +
tika-eval/tika-eval-core/pom.xml | 1 +
.../eval/core/metadata/TikaEvalMetadataFilter.java | 4 +
.../core/metadata/TikaEvalMetadataFilterTest.java | 1 +
tika-fuzzing/pom.xml | 1 +
tika-java7/pom.xml | 1 +
tika-parent/pom.xml | 102 +++----
.../apache/tika/parser/geopkg/GeoPkgDBParser.java | 54 ++++
.../apache/tika/parser/geopkg/GeoPkgParser.java | 127 +++++++++
.../GeoPkgTableReader.java} | 59 ++--
.../tika/parser/sqlite3/SQLite3DBParser.java | 2 +-
.../tika/parser/sqlite3/SQLite3TableReader.java | 2 +-
.../services/org.apache.tika.parser.Parser | 1 +
.../tika-parsers-ml/tika-age-recogniser/pom.xml | 2 +-
.../tika/parser/iwork/IWorkPackageParser.java | 47 ++--
.../apache/tika/parser/html/HtmlParserTest.java | 2 +-
.../detect/microsoft/ooxml/OPCPackageDetector.java | 47 ++--
.../apache/tika/parser/microsoft/WMFParser.java | 3 +-
.../tika/parser/microsoft/chm/ChmCommons.java | 11 +-
.../tika/parser/microsoft/chm/ChmExtractor.java | 3 +-
.../tika/parser/microsoft/chm/ChmPmgiHeader.java | 2 +-
.../ooxml/XSLFPowerPointExtractorDecorator.java | 3 +-
.../tika/parser/microsoft/chm/TestChmLzxState.java | 3 +-
.../apache/tika/detect/ole/MiscOLEDetector.java | 4 +-
.../apache/tika/parser/epub/EncryptionParser.java | 88 ------
.../org/apache/tika/parser/epub/EpubParser.java | 193 +++++++++++---
.../apache/tika/parser/iptc/IptcAnpaParser.java | 1 +
.../apache/tika/parser/ocr/TesseractOCRParser.java | 20 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 6 +
.../org/apache/tika/parser/pdf/OCRPageCounter.java | 31 +--
.../java/org/apache/tika/parser/pdf/PDFParser.java | 6 +
.../org/apache/tika/parser/pdf/XFAExtractor.java | 3 +
.../org/apache/tika/parser/pdf/PDFParserTest.java | 12 +-
.../detect/gzip/GZipSpecializationDetector.java | 4 +
.../org/apache/tika/parser/pkg/PackageParser.java | 7 +-
.../org/apache/tika/parser/txt/BOMDetector.java | 93 +++++++
.../apache/tika/parser/txt/BOMDetectorTest.java | 91 +++++++
.../org/apache/tika/parser/txt/TXTParserTest.java | 2 +
.../org/apache/tika/parser/warc/WARCParser.java | 14 +-
.../apache/tika/parser/warc/WARCParserTest.java | 31 ++-
.../test/resources/test-documents/example.arc.gz | Bin 0 -> 1027 bytes
.../src/test/resources/test-documents/testARC.arc | 50 ++++
.../apache/tika/parser/xml/MetadataHandler.java | 4 +
.../tika/detect/TestContainerAwareDetector.java | 5 +
.../java/org/apache/tika/mime/TestMimeTypes.java | 6 +
.../tika/parser/RecursiveParserWrapperTest.java | 5 +-
.../parser/microsoft/ooxml/TruncatedOOXMLTest.java | 9 +
.../tika/parser/ocr/TesseractOCRParserTest.java | 9 +
.../apache/tika/parser/pkg/Seven7ParserTest.java | 12 +-
.../resources/configs/tika-config-no-names.xml | 2 +-
.../resources/configs/tika-config-with-names.xml | 2 +-
.../src/test/resources/test-documents/test3mf.3mf | Bin 0 -> 28243 bytes
.../resources/test-documents/testSTL-ascii.stl | 16 ++
.../resources/test-documents/testSTL-binary.stl | Bin 0 -> 160 bytes
tika-pipes/tika-async-cli/pom.xml | 7 +
.../apache/tika/async/cli/AsyncProcessorTest.java | 140 ++++++++++
.../apache/tika/async/cli/TikaAsyncCLITest.java | 2 +-
.../test/resources/configs/TIKA-4207-emitter.xml | 28 +-
.../resources/{ => configs}/tika-config-broken.xml | 0
.../basic_embedded.xml} | 29 +-
tika-pipes/tika-pipes-iterators/pom.xml | 1 +
.../tika-pipes-iterator-json}/pom.xml | 43 ++-
.../pipesiterator/json/JsonPipesIterator.java | 65 +++++
.../pipesiterator/json/TestJsonPipesIterator.java | 85 ++++++
.../test-documents/test-with-embedded-bytes.json | 100 +++++++
.../src/test/resources/test-documents/test.json | 100 +++++++
.../pipes/reporters/jdbc/JDBCPipesReporter.java | 52 ++--
.../metadata/serialization/JsonFetchEmitTuple.java | 71 ++++-
.../serialization/JsonFetchEmitTupleTest.java | 20 ++
tika-server/tika-server-core/pom.xml | 10 +-
.../apache/tika/server/core/TikaServerProcess.java | 2 +-
.../tika/server/core/resource/AsyncResource.java | 32 ++-
.../tika/server/core/resource/TikaResource.java | 2 +-
.../apache/tika/server/core/TikaVersionTest.java | 2 +-
.../apache/tika/server/core/TikaWelcomeTest.java | 4 +-
.../apache/tika/server/standard/TikaPipesTest.java | 93 +++++++
tika-translate/pom.xml | 1 +
tika-xmp/pom.xml | 1 +
123 files changed, 3290 insertions(+), 686 deletions(-)