You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by nd...@apache.org on 2024/03/29 07:30:52 UTC

(tika) 01/01: Merge branch 'main' of github.com:apache/tika into TIKA-4181-grpc

This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4181-grpc
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 100ef9c3063e49106a2c7fbff4942bbe9edc7042
Merge: 322452021 941f8f26c
Author: Nicholas DiPiazza <nd...@apache.org>
AuthorDate: Fri Mar 29 02:30:30 2024 -0500

    Merge branch 'main' of github.com:apache/tika into TIKA-4181-grpc

 CHANGES.txt                                        |   2 +
 tika-app/pom.xml                                   |   1 +
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |   2 +-
 .../java/org/apache/tika/cli/TikaCLIAsyncTest.java |  89 +++++++
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  59 +---
 tika-batch/pom.xml                                 |   3 +
 tika-core/src/main/java/org/apache/tika/Tika.java  |   4 +
 .../org/apache/tika/detect/AutoDetectReader.java   |  38 +--
 .../tika/detect/CompositeEncodingDetector.java     |   7 +
 .../AbstractEmbeddedDocumentBytesHandler.java      |  69 +++++
 .../tika/extractor/BasicEmbeddedBytesSelector.java |  77 ++++++
 .../BasicEmbeddedDocumentBytesHandler.java         |  58 ++++
 .../tika/extractor/EmbeddedBytesSelector.java      |  31 +--
 .../EmbeddedDocumentByteStoreExtractorFactory.java |  36 +--
 .../extractor/EmbeddedDocumentBytesHandler.java    |  32 +--
 .../ParsingEmbeddedDocumentExtractor.java          |  10 +-
 .../apache/tika/extractor/RUnpackExtractor.java    | 183 +++++++++++++
 .../tika/extractor/RUnpackExtractorFactory.java    | 111 ++++++++
 .../org/apache/tika/io/BoundedInputStream.java     |  31 ++-
 .../main/java/org/apache/tika/metadata/IPTC.java   |   8 +
 .../main/java/org/apache/tika/metadata/PDF.java    |   6 +
 .../apache/tika/metadata/TikaCoreProperties.java   |  20 ++
 .../main/java/org/apache/tika/mime/MimeTypes.java  |   4 +-
 .../org/apache/tika/parser/AbstractParser.java     |   1 +
 .../org/apache/tika/parser/AutoDetectParser.java   |  11 +-
 .../apache/tika/parser/AutoDetectParserConfig.java |   4 +-
 .../org/apache/tika/parser/ParserDecorator.java    |   1 +
 .../apache/tika/parser/RecursiveParserWrapper.java |   2 +
 .../parser/multiple/AbstractMultipleParser.java    |   1 +
 .../java/org/apache/tika/pipes/FetchEmitTuple.java |  52 +++-
 .../java/org/apache/tika/pipes/PipesServer.java    | 296 +++++++++++++++------
 .../extractor/EmbeddedDocumentBytesConfig.java     | 167 ++++++++++++
 .../EmittingEmbeddedDocumentBytesHandler.java      |  73 +++++
 .../org/apache/tika/mime/tika-mimetypes.xml        |  89 ++++++-
 .../java/org/apache/tika/TikaDetectionTest.java    |   2 +-
 .../tika/parser/AutoDetectParserConfigTest.java    |  72 +++++
 .../org/apache/tika/parser/mock/MockParser.java    |  26 +-
 .../org/apache/tika/pipes/PipesServerTest.java     | 120 ++++++++-
 ...rocessorTest.java => AsyncChaosMonkeyTest.java} |   2 +-
 .../config/TIKA-4207-embedded-bytes-config.xml     |  13 +-
 .../apache/tika/pipes/TIKA-4207-limit-bytes.xml    |  19 +-
 .../resources/org/apache/tika/pipes/TIKA-4207.xml  |  19 +-
 tika-eval/tika-eval-app/pom.xml                    |   7 +-
 .../org/apache/tika/eval/app/AbstractProfiler.java |  17 +-
 .../org/apache/tika/eval/app/ExtractProfiler.java  |   4 +
 .../java/org/apache/tika/eval/app/db/Cols.java     |   3 +
 tika-eval/tika-eval-core/pom.xml                   |   1 +
 .../eval/core/metadata/TikaEvalMetadataFilter.java |   4 +
 .../core/metadata/TikaEvalMetadataFilterTest.java  |   1 +
 tika-fuzzing/pom.xml                               |   1 +
 tika-java7/pom.xml                                 |   1 +
 tika-parent/pom.xml                                | 102 +++----
 .../apache/tika/parser/geopkg/GeoPkgDBParser.java  |  54 ++++
 .../apache/tika/parser/geopkg/GeoPkgParser.java    | 127 +++++++++
 .../GeoPkgTableReader.java}                        |  59 ++--
 .../tika/parser/sqlite3/SQLite3DBParser.java       |   2 +-
 .../tika/parser/sqlite3/SQLite3TableReader.java    |   2 +-
 .../services/org.apache.tika.parser.Parser         |   1 +
 .../tika-parsers-ml/tika-age-recogniser/pom.xml    |   2 +-
 .../tika/parser/iwork/IWorkPackageParser.java      |  47 ++--
 .../apache/tika/parser/html/HtmlParserTest.java    |   2 +-
 .../detect/microsoft/ooxml/OPCPackageDetector.java |  47 ++--
 .../apache/tika/parser/microsoft/WMFParser.java    |   3 +-
 .../tika/parser/microsoft/chm/ChmCommons.java      |  11 +-
 .../tika/parser/microsoft/chm/ChmExtractor.java    |   3 +-
 .../tika/parser/microsoft/chm/ChmPmgiHeader.java   |   2 +-
 .../ooxml/XSLFPowerPointExtractorDecorator.java    |   3 +-
 .../tika/parser/microsoft/chm/TestChmLzxState.java |   3 +-
 .../apache/tika/detect/ole/MiscOLEDetector.java    |   4 +-
 .../apache/tika/parser/epub/EncryptionParser.java  |  88 ------
 .../org/apache/tika/parser/epub/EpubParser.java    | 193 +++++++++++---
 .../apache/tika/parser/iptc/IptcAnpaParser.java    |   1 +
 .../apache/tika/parser/ocr/TesseractOCRParser.java |  20 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |   6 +
 .../org/apache/tika/parser/pdf/OCRPageCounter.java |  31 +--
 .../java/org/apache/tika/parser/pdf/PDFParser.java |   6 +
 .../org/apache/tika/parser/pdf/XFAExtractor.java   |   3 +
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  12 +-
 .../detect/gzip/GZipSpecializationDetector.java    |   4 +
 .../org/apache/tika/parser/pkg/PackageParser.java  |   7 +-
 .../org/apache/tika/parser/txt/BOMDetector.java    |  93 +++++++
 .../apache/tika/parser/txt/BOMDetectorTest.java    |  91 +++++++
 .../org/apache/tika/parser/txt/TXTParserTest.java  |   2 +
 .../org/apache/tika/parser/warc/WARCParser.java    |  14 +-
 .../apache/tika/parser/warc/WARCParserTest.java    |  31 ++-
 .../test/resources/test-documents/example.arc.gz   | Bin 0 -> 1027 bytes
 .../src/test/resources/test-documents/testARC.arc  |  50 ++++
 .../apache/tika/parser/xml/MetadataHandler.java    |   4 +
 .../tika/detect/TestContainerAwareDetector.java    |   5 +
 .../java/org/apache/tika/mime/TestMimeTypes.java   |   6 +
 .../tika/parser/RecursiveParserWrapperTest.java    |   5 +-
 .../parser/microsoft/ooxml/TruncatedOOXMLTest.java |   9 +
 .../tika/parser/ocr/TesseractOCRParserTest.java    |   9 +
 .../apache/tika/parser/pkg/Seven7ParserTest.java   |  12 +-
 .../resources/configs/tika-config-no-names.xml     |   2 +-
 .../resources/configs/tika-config-with-names.xml   |   2 +-
 .../src/test/resources/test-documents/test3mf.3mf  | Bin 0 -> 28243 bytes
 .../resources/test-documents/testSTL-ascii.stl     |  16 ++
 .../resources/test-documents/testSTL-binary.stl    | Bin 0 -> 160 bytes
 tika-pipes/tika-async-cli/pom.xml                  |   7 +
 .../apache/tika/async/cli/AsyncProcessorTest.java  | 140 ++++++++++
 .../apache/tika/async/cli/TikaAsyncCLITest.java    |   2 +-
 .../test/resources/configs/TIKA-4207-emitter.xml   |  28 +-
 .../resources/{ => configs}/tika-config-broken.xml |   0
 .../basic_embedded.xml}                            |  29 +-
 tika-pipes/tika-pipes-iterators/pom.xml            |   1 +
 .../tika-pipes-iterator-json}/pom.xml              |  43 ++-
 .../pipesiterator/json/JsonPipesIterator.java      |  65 +++++
 .../pipesiterator/json/TestJsonPipesIterator.java  |  85 ++++++
 .../test-documents/test-with-embedded-bytes.json   | 100 +++++++
 .../src/test/resources/test-documents/test.json    | 100 +++++++
 .../pipes/reporters/jdbc/JDBCPipesReporter.java    |  52 ++--
 .../metadata/serialization/JsonFetchEmitTuple.java |  71 ++++-
 .../serialization/JsonFetchEmitTupleTest.java      |  20 ++
 tika-server/tika-server-core/pom.xml               |  10 +-
 .../apache/tika/server/core/TikaServerProcess.java |   2 +-
 .../tika/server/core/resource/AsyncResource.java   |  32 ++-
 .../tika/server/core/resource/TikaResource.java    |   2 +-
 .../apache/tika/server/core/TikaVersionTest.java   |   2 +-
 .../apache/tika/server/core/TikaWelcomeTest.java   |   4 +-
 .../apache/tika/server/standard/TikaPipesTest.java |  93 +++++++
 tika-translate/pom.xml                             |   1 +
 tika-xmp/pom.xml                                   |   1 +
 123 files changed, 3290 insertions(+), 686 deletions(-)