You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/03/24 18:26:58 UTC

[tika] branch TIKA-3304 updated (26ff633 -> 5dbffcd)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-3304
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 26ff633  WIP -- do not merge...still a bunch to do
     add e47f625  TIKA-3309 Add convenience constructors to RereadableInputStream (#408)
     add 7f684d7  fix setter in ByteFlipper
     add c11cab6  upgrade jackcess
     add 8028a00  improve robustness of image processing in PDFs
     add 3096f3f  fix unit test to handle counts w and w/out tesseract
     add cba0372  TIKA-3316 -- improve XPS parser to include open XPS and allow for streaming zips with data descriptors
     add 31da853  TIKA-3318 MP3 parser should output the xmpDM:duration metadata as seconds not milliseconds
     add 356cf44  TIKA-3318 Document the units of xmpDM:duration as seconds by default
     add d80dc36  TIKA-3310 Check if MP4 file's compatible brands match any of the expected values
     add 187fd47  TIKA-3310 Check major brand before checking compatible brands
     add 4551f7d  Separate search for major brand and compatible brands
     add 4bd931d  Merge pull request #410 from peterkronenberg/main
     add 06769d3  Added case-insensitivity to tika server ocr header names (#414)
     add 1bdbc56  Update CHANGES.txt
     add fa5612a  TIKA-3324 -- add checkstyle plugin -- fail on build for tika-core only as a start
     add 87f05de  TIKA-3313 Improve performance and usability of RereadableInputStream (#413)
     add 01dca21  Minor cleaning and added missing javadoc on TikaServer (#415)
     add 9905db9  Merge remote-tracking branch 'origin/main' into main
     add 0e64563  TIKA-3324 -- add checkstyle plugin -- fix merge
     add 42b719b  TIKA-3324 -- add checkstyle plugin -- fix merge, again... :(
     add 4428958  TIKA-3323 -- allow flexibility for 'file' command output on different operating systems.
     add ba9bcb2  TIKA-3316 -- fix for slightly different behavior of RereadableInputStream
     add d93ba62  TIKA-3324 -- code cleanup for checkstyle in tika-parsers-classic
     add f58a27c  TIKA-3325 -- writeLimit is now calculated on the full file (container and embedded documents), no longer on each.
     add 20eae4f  TIKA-3331 -- throw a more informative exception for an encrypted odt file
     add 1766166  TIKA-3322 -- upgrade PDFBox to 2.0.23
     add 33a4f42  clean up dependencies
     add 29ef4b5  TIKA-3332 -- recursively search embedded file tree for attachments
     add 5da9984  TIKA-3332 -- checkstyle fix
     add 0beb61a  TIKA-3324 -- add checkstyle enforcement to the tika-server module
     add ea359c9  TIKA-3324 -- add checkstyle enforcement to tika-parsers and submodules.
     add 769938f  [TIKA-3311] Add github workflows to Tika
     add 41a99cc  Merge pull request #407 from lewismc/TIKA-3311
     add 667a310  TIKA-3334 -- fix thread safety in OpenDocumentParser
     add de6cf73  TIKA-3336 -- don't doubly advance...prevent new zip bomb warnings in regression tests for 1.26 release
     add ac05932  TIKA-3335 -- invalid xml during encryption check shouldn't cause the parse to fail
     add d87ac65  checkstyle fixes
     new 9416f59  Merge remote-tracking branch 'origin/main' into TIKA-3304
     new 5dbffcd  merge from main and required updates/conflict resolution

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .github/pull_request_template.md                   |   12 +
 .../workflows/main-build.yml                       |   32 +-
 .gitignore                                         |    1 +
 CHANGES.txt                                        |   25 +-
 tika-core/pom.xml                                  |   30 +
 tika-core/src/main/java/org/apache/tika/Tika.java  |  143 +-
 .../concurrent/ConfigurableThreadPoolExecutor.java |   64 +-
 .../tika/concurrent/SimpleThreadPoolExecutor.java  |   82 +-
 .../main/java/org/apache/tika/config/Field.java    |    4 +-
 .../java/org/apache/tika/config/Initializable.java |   11 +-
 .../tika/config/InitializableProblemHandler.java   |   19 +-
 .../org/apache/tika/config/LoadErrorHandler.java   |   29 +-
 .../main/java/org/apache/tika/config/Param.java    |  261 +-
 .../java/org/apache/tika/config/ParamField.java    |   56 +-
 .../java/org/apache/tika/config/ServiceLoader.java |  212 +-
 .../java/org/apache/tika/config/TikaActivator.java |    5 +-
 .../java/org/apache/tika/config/TikaConfig.java    |  701 +++---
 .../apache/tika/config/TikaConfigSerializer.java   |  119 +-
 .../org/apache/tika/detect/AutoDetectReader.java   |  100 +-
 .../org/apache/tika/detect/CompositeDetector.java  |   34 +-
 .../tika/detect/CompositeEncodingDetector.java     |   22 +-
 .../org/apache/tika/detect/DefaultDetector.java    |   83 +-
 .../tika/detect/DefaultEncodingDetector.java       |   13 +-
 .../apache/tika/detect/DefaultProbDetector.java    |   32 +-
 .../main/java/org/apache/tika/detect/Detector.java |    2 +-
 .../java/org/apache/tika/detect/EmptyDetector.java |    3 +-
 .../org/apache/tika/detect/EncodingDetector.java   |    2 +-
 .../apache/tika/detect/FileCommandDetector.java    |   58 +-
 .../java/org/apache/tika/detect/MagicDetector.java |  357 ++-
 .../apache/tika/detect/NNExampleModelDetector.java |   19 +-
 .../org/apache/tika/detect/NNTrainedModel.java     |  147 +-
 .../apache/tika/detect/NNTrainedModelBuilder.java  |   77 +-
 .../java/org/apache/tika/detect/NameDetector.java  |    6 +-
 .../tika/detect/NonDetectingEncodingDetector.java  |    8 +-
 .../org/apache/tika/detect/OverrideDetector.java   |    9 +-
 .../java/org/apache/tika/detect/TextDetector.java  |   15 +-
 .../org/apache/tika/detect/TextStatistics.java     |   27 +-
 .../java/org/apache/tika/detect/TrainedModel.java  |    7 +-
 .../apache/tika/detect/TrainedModelDetector.java   |   22 +-
 .../java/org/apache/tika/detect/TypeDetector.java  |    2 +-
 .../org/apache/tika/detect/XmlRootExtractor.java   |   16 +-
 .../apache/tika/detect/ZeroSizeFileDetector.java   |   13 +-
 .../java/org/apache/tika/embedder/Embedder.java    |   37 +-
 .../org/apache/tika/embedder/ExternalEmbedder.java |  243 +-
 .../tika/exception/EncryptedDocumentException.java |    2 +-
 .../apache/tika/exception/TikaConfigException.java |    1 +
 .../org/apache/tika/exception/TikaException.java   |    2 +-
 .../tika/exception/TikaMemoryLimitException.java   |    2 +-
 .../{package-info.java => WriteLimitReached.java}  |   10 +-
 .../tika/exception/ZeroByteFileException.java      |   15 +-
 .../apache/tika/extractor/ContainerExtractor.java  |   23 +-
 .../extractor/DefaultEmbeddedStreamTranslator.java |    8 +-
 .../apache/tika/extractor/DocumentSelector.java    |    2 +-
 .../tika/extractor/EmbeddedDocumentExtractor.java  |    7 +-
 .../tika/extractor/EmbeddedDocumentUtil.java       |   24 +-
 .../tika/extractor/EmbeddedStreamTranslator.java   |    4 +-
 .../tika/extractor/ParserContainerExtractor.java   |    8 +-
 .../ParsingEmbeddedDocumentExtractor.java          |   28 +-
 .../org/apache/tika/fork/ClassLoaderProxy.java     |   10 +-
 .../org/apache/tika/fork/ClassLoaderResource.java  |    7 +-
 .../org/apache/tika/fork/ContentHandlerProxy.java  |   54 +-
 .../apache/tika/fork/ContentHandlerResource.java   |   14 +-
 .../main/java/org/apache/tika/fork/ForkClient.java |  184 +-
 .../apache/tika/fork/ForkObjectInputStream.java    |   43 +-
 .../main/java/org/apache/tika/fork/ForkParser.java |  162 +-
 .../java/org/apache/tika/fork/ForkResource.java    |    4 +-
 .../main/java/org/apache/tika/fork/ForkServer.java |  126 +-
 .../org/apache/tika/fork/InputStreamProxy.java     |    4 +-
 .../org/apache/tika/fork/InputStreamResource.java  |    3 +-
 .../org/apache/tika/fork/MemoryURLConnection.java  |    2 +-
 .../apache/tika/fork/MemoryURLStreamHandler.java   |    4 +-
 .../tika/fork/MemoryURLStreamHandlerFactory.java   |    2 +-
 .../apache/tika/fork/MemoryURLStreamRecord.java    |    2 +-
 .../apache/tika/fork/MetadataContentHandler.java   |    6 +-
 .../org/apache/tika/fork/ParserFactoryFactory.java |   13 +-
 .../fork/RecursiveMetadataContentHandlerProxy.java |   39 +-
 .../RecursiveMetadataContentHandlerResource.java   |   40 +-
 .../org/apache/tika/io/BoundedInputStream.java     |    4 +-
 .../main/java/org/apache/tika/io/EndianUtils.java  |   42 +-
 .../java/org/apache/tika/io/FilenameUtils.java     |   30 +-
 .../src/main/java/org/apache/tika/io/IOUtils.java  |   18 +-
 .../org/apache/tika/io/InputStreamFactory.java     |   17 +-
 .../org/apache/tika/io/LookaheadInputStream.java   |   10 +-
 .../org/apache/tika/io/MappedBufferCleaner.java    |   96 +-
 .../main/java/org/apache/tika/io/TailStream.java   |  141 +-
 .../org/apache/tika/io/TemporaryResources.java     |    8 +-
 .../java/org/apache/tika/io/TikaInputStream.java   |  314 ++-
 .../apache/tika/language/LanguageIdentifier.java   |  165 +-
 .../org/apache/tika/language/LanguageProfile.java  |  106 +-
 .../tika/language/LanguageProfilerBuilder.java     |  496 ++--
 .../org/apache/tika/language/ProfilingHandler.java |    3 +-
 .../org/apache/tika/language/ProfilingWriter.java  |    2 +-
 .../tika/language/detect/LanguageConfidence.java   |    5 +-
 .../tika/language/detect/LanguageDetector.java     |  342 +--
 .../tika/language/detect/LanguageHandler.java      |   10 +-
 .../apache/tika/language/detect/LanguageNames.java |  111 +-
 .../tika/language/detect/LanguageResult.java       |  158 +-
 .../tika/language/detect/LanguageWriter.java       |   10 +-
 .../tika/language/translate/DefaultTranslator.java |   14 +-
 .../tika/language/translate/EmptyTranslator.java   |    2 +-
 .../apache/tika/language/translate/Translator.java |   22 +-
 .../apache/tika/metadata/AccessPermissions.java    |   24 +-
 .../org/apache/tika/metadata/ClimateForcast.java   |   30 +-
 .../org/apache/tika/metadata/CreativeCommons.java  |    2 +-
 .../java/org/apache/tika/metadata/Database.java    |   14 +-
 .../java/org/apache/tika/metadata/DublinCore.java  |   76 +-
 .../main/java/org/apache/tika/metadata/Font.java   |    4 +-
 .../java/org/apache/tika/metadata/Geographic.java  |   15 +-
 .../main/java/org/apache/tika/metadata/HTML.java   |    6 +-
 .../java/org/apache/tika/metadata/HttpHeaders.java |    4 +-
 .../main/java/org/apache/tika/metadata/IPTC.java   | 2527 ++++++++++----------
 .../org/apache/tika/metadata/MachineMetadata.java  |  143 +-
 .../java/org/apache/tika/metadata/Message.java     |   38 +-
 .../java/org/apache/tika/metadata/Metadata.java    |  241 +-
 .../main/java/org/apache/tika/metadata/Office.java |  211 +-
 .../apache/tika/metadata/OfficeOpenXMLCore.java    |   52 +-
 .../tika/metadata/OfficeOpenXMLExtended.java       |   65 +-
 .../main/java/org/apache/tika/metadata/PDF.java    |   76 +-
 .../java/org/apache/tika/metadata/PagedText.java   |    4 +-
 .../java/org/apache/tika/metadata/Photoshop.java   |   31 +-
 .../java/org/apache/tika/metadata/Property.java    |  265 +-
 .../tika/metadata/PropertyTypeException.java       |    8 +-
 .../java/org/apache/tika/metadata/QuattroPro.java  |   66 +-
 .../java/org/apache/tika/metadata/RTFMetadata.java |   45 +-
 .../main/java/org/apache/tika/metadata/TIFF.java   |  100 +-
 .../apache/tika/metadata/TikaCoreProperties.java   |  337 ++-
 .../java/org/apache/tika/metadata/WordPerfect.java |  100 +-
 .../main/java/org/apache/tika/metadata/XMP.java    |    4 +-
 .../main/java/org/apache/tika/metadata/XMPDM.java  |  290 +--
 .../main/java/org/apache/tika/metadata/XMPIdq.java |    4 +-
 .../main/java/org/apache/tika/metadata/XMPMM.java  |   42 +-
 .../java/org/apache/tika/metadata/XMPRights.java   |   20 +-
 .../metadata/filter/ClearByMimeMetadataFilter.java |   13 +-
 .../metadata/filter/CompositeMetadataFilter.java   |    4 +-
 .../metadata/filter/DefaultMetadataFilter.java     |   20 +-
 .../filter/ExcludeFieldMetadataFilter.java         |   10 +-
 .../metadata/filter/FieldNameMappingFilter.java    |   25 +-
 .../filter/IncludeFieldMetadataFilter.java         |   11 +-
 .../tika/metadata/filter/MetadataFilter.java       |    4 +-
 .../main/java/org/apache/tika/mime/HexCoDec.java   |   49 +-
 .../src/main/java/org/apache/tika/mime/Magic.java  |    2 -
 .../main/java/org/apache/tika/mime/MagicMatch.java |   12 +-
 .../main/java/org/apache/tika/mime/MediaType.java  |  273 +--
 .../org/apache/tika/mime/MediaTypeRegistry.java    |   67 +-
 .../main/java/org/apache/tika/mime/MimeType.java   |  283 ++-
 .../org/apache/tika/mime/MimeTypeException.java    |    6 +-
 .../main/java/org/apache/tika/mime/MimeTypes.java  |  247 +-
 .../org/apache/tika/mime/MimeTypesFactory.java     |  108 +-
 .../java/org/apache/tika/mime/MimeTypesReader.java |  346 +--
 .../apache/tika/mime/MimeTypesReaderMetKeys.java   |    2 +-
 .../org/apache/tika/mime/MinShouldMatchClause.java |   11 +-
 .../main/java/org/apache/tika/mime/Patterns.java   |   99 +-
 .../mime/ProbabilisticMimeDetectionSelector.java   |   93 +-
 .../parser/AbstractEncodingDetectorParser.java     |    1 +
 .../org/apache/tika/parser/AbstractParser.java     |   11 +-
 .../org/apache/tika/parser/AutoDetectParser.java   |   33 +-
 .../tika/parser/AutoDetectParserFactory.java       |    9 +-
 .../org/apache/tika/parser/CompositeParser.java    |   93 +-
 .../java/org/apache/tika/parser/CryptoParser.java  |   26 +-
 .../java/org/apache/tika/parser/DefaultParser.java |   88 +-
 .../org/apache/tika/parser/DelegatingParser.java   |   13 +-
 .../org/apache/tika/parser/DigestingParser.java    |   63 +-
 .../java/org/apache/tika/parser/EmptyParser.java   |   20 +-
 .../java/org/apache/tika/parser/ErrorParser.java   |   12 +-
 .../java/org/apache/tika/parser/NetworkParser.java |   66 +-
 .../java/org/apache/tika/parser/ParseContext.java  |   67 +-
 .../main/java/org/apache/tika/parser/Parser.java   |   23 +-
 .../org/apache/tika/parser/ParserDecorator.java    |   84 +-
 .../java/org/apache/tika/parser/ParserFactory.java |    7 +-
 .../apache/tika/parser/ParserPostProcessor.java    |   11 +-
 .../java/org/apache/tika/parser/ParsingReader.java |  130 +-
 .../org/apache/tika/parser/PasswordProvider.java   |   11 +-
 .../apache/tika/parser/RecursiveParserWrapper.java |  177 +-
 .../org/apache/tika/parser/StatefulParser.java     |    2 +-
 .../tika/parser/digest/CompositeDigester.java      |    2 +-
 .../tika/parser/digest/InputStreamDigester.java    |   64 +-
 .../parser/external/CompositeExternalParser.java   |   25 +-
 .../tika/parser/external/ExternalParser.java       |  293 ++-
 .../external/ExternalParsersConfigReader.java      |  335 ++-
 .../ExternalParsersConfigReaderMetKeys.java        |   14 +-
 .../parser/external/ExternalParsersFactory.java    |   93 +-
 .../parser/multiple/AbstractMultipleParser.java    |  376 +--
 .../tika/parser/multiple/FallbackParser.java       |   36 +-
 .../tika/parser/multiple/SupplementingParser.java  |   50 +-
 .../apache/tika/pipes/emitter/AbstractEmitter.java |   38 +-
 .../org/apache/tika/pipes/emitter/EmitData.java    |   25 +-
 .../org/apache/tika/pipes/emitter/EmitKey.java     |   18 +-
 .../org/apache/tika/pipes/emitter/Emitter.java     |    4 +-
 .../apache/tika/pipes/emitter/EmitterManager.java  |   16 +-
 .../apache/tika/pipes/emitter/EmptyEmitter.java    |    7 +-
 .../apache/tika/pipes/emitter/StreamEmitter.java   |    4 +-
 .../apache/tika/pipes/fetcher/EmptyFetcher.java    |    6 +-
 .../org/apache/tika/pipes/fetcher/FetchKey.java    |   22 +-
 .../org/apache/tika/pipes/fetcher/Fetcher.java     |   10 +-
 .../apache/tika/pipes/fetcher/FetcherManager.java  |   18 +-
 .../tika/pipes/fetcher/FileSystemFetcher.java      |   50 +-
 .../pipes/fetchiterator/EmptyFetchIterator.java    |    6 -
 .../tika/pipes/fetchiterator/FetchEmitTuple.java   |   38 +-
 .../tika/pipes/fetchiterator/FetchIterator.java    |   62 +-
 .../fetchiterator/FileSystemFetchIterator.java     |   41 +-
 .../sax/AbstractRecursiveParserWrapperHandler.java |   53 +-
 .../tika/sax/BasicContentHandlerFactory.java       |   98 +-
 .../org/apache/tika/sax/BodyContentHandler.java    |   12 +-
 .../java/org/apache/tika/sax/CleanPhoneText.java   |  345 +--
 .../apache/tika/sax/ContentHandlerDecorator.java   |   18 +-
 .../org/apache/tika/sax/ContentHandlerFactory.java |   15 +-
 .../org/apache/tika/sax/DIFContentHandler.java     |  242 +-
 .../tika/sax/ElementMappingContentHandler.java     |   81 +-
 .../sax/EndDocumentShieldingContentHandler.java    |   16 +-
 .../tika/sax/ExpandedTitleContentHandler.java      |   22 +-
 .../src/main/java/org/apache/tika/sax/Link.java    |    4 +-
 .../main/java/org/apache/tika/sax/LinkBuilder.java |   12 +-
 .../org/apache/tika/sax/LinkContentHandler.java    |   30 +-
 .../tika/sax/PhoneExtractingContentHandler.java    |   20 +-
 .../tika/sax/RecursiveParserWrapperHandler.java    |   54 +-
 .../apache/tika/sax/RichTextContentHandler.java    |    3 +-
 .../org/apache/tika/sax/SafeContentHandler.java    |  155 +-
 .../org/apache/tika/sax/SecureContentHandler.java  |   86 +-
 .../org/apache/tika/sax/StandardOrganizations.java |  305 +--
 .../org/apache/tika/sax/StandardReference.java     |  201 +-
 .../sax/StandardsExtractingContentHandler.java     |  155 +-
 .../java/org/apache/tika/sax/StandardsText.java    |  277 +--
 .../org/apache/tika/sax/TaggedContentHandler.java  |    8 +-
 .../org/apache/tika/sax/TaggedSAXException.java    |    6 +-
 .../org/apache/tika/sax/TeeContentHandler.java     |   18 +-
 .../tika/sax/TextAndAttributeContentHandler.java   |   12 +-
 .../org/apache/tika/sax/TextContentHandler.java    |   12 +-
 .../org/apache/tika/sax/ToHTMLContentHandler.java  |   10 +-
 .../org/apache/tika/sax/ToTextContentHandler.java  |   30 +-
 .../org/apache/tika/sax/ToXMLContentHandler.java   |  120 +-
 .../apache/tika/sax/WriteOutContentHandler.java    |   52 +-
 .../org/apache/tika/sax/XHTMLContentHandler.java   |  103 +-
 .../org/apache/tika/sax/XMPContentHandler.java     |   27 +-
 .../apache/tika/sax/xpath/CompositeMatcher.java    |    3 +-
 .../java/org/apache/tika/sax/xpath/Matcher.java    |    4 +-
 .../tika/sax/xpath/MatchingContentHandler.java     |   21 +-
 .../org/apache/tika/sax/xpath/XPathParser.java     |   10 +-
 .../org/apache/tika/utils/AnnotationUtils.java     |   61 +-
 .../java/org/apache/tika/utils/CharsetUtils.java   |   97 +-
 .../java/org/apache/tika/utils/CompareUtils.java   |   10 +-
 .../org/apache/tika/utils/ConcurrentUtils.java     |  112 +-
 .../main/java/org/apache/tika/utils/DateUtils.java |   90 +-
 .../java/org/apache/tika/utils/ExceptionUtils.java |    5 +-
 .../java/org/apache/tika/utils/ParserUtils.java    |   60 +-
 .../java/org/apache/tika/utils/ProcessUtils.java   |    2 +-
 .../java/org/apache/tika/utils/RegexUtils.java     |   20 +-
 .../apache/tika/utils/RereadableInputStream.java   |  261 +-
 .../org/apache/tika/utils/ServiceLoaderUtils.java  |   12 +-
 .../java/org/apache/tika/utils/StringUtils.java    |   26 +-
 .../java/org/apache/tika/utils/SystemUtils.java    |   15 +-
 .../java/org/apache/tika/utils/XMLReaderUtils.java |  545 +++--
 .../org/apache/custom/detect/MyCustomDetector.java |    6 +-
 .../org/apache/tika/MultiThreadedTikaTest.java     |  332 +--
 .../apache/tika/ResourceLoggingClassLoader.java    |   24 +-
 .../org/apache/tika/TestRereadableInputStream.java |  144 +-
 .../java/org/apache/tika/TikaDetectionTest.java    |   51 +-
 .../src/test/java/org/apache/tika/TikaIT.java      |    5 +-
 .../src/test/java/org/apache/tika/TikaTest.java    |  444 ++--
 .../org/apache/tika/TypeDetectionBenchmark.java    |   18 +-
 .../apache/tika/config/AbstractTikaConfigTest.java |   14 +-
 .../java/org/apache/tika/config/DummyExecutor.java |   59 +-
 .../java/org/apache/tika/config/DummyParser.java   |    8 +-
 .../java/org/apache/tika/config/ParamTest.java     |   37 +-
 .../tika/config/TikaConfigSerializerTest.java      |   24 +-
 .../org/apache/tika/config/TikaConfigTest.java     |  163 +-
 .../tika/detect/FileCommandDetectorTest.java       |   40 +-
 .../org/apache/tika/detect/MagicDetectorTest.java  |  143 +-
 .../tika/detect/MimeDetectionWithNNTest.java       |  213 +-
 .../org/apache/tika/detect/NameDetectorTest.java   |   25 +-
 .../org/apache/tika/detect/TextDetectorTest.java   |   31 +-
 .../org/apache/tika/detect/TypeDetectorTest.java   |   31 +-
 .../tika/detect/ZeroSizeFileDetectorTest.java      |    5 +-
 .../java/org/apache/tika/fork/ForkParserTest.java  |  123 +-
 .../apache/tika/fork/ForkParserTikaBinTest.java    |  135 +-
 .../java/org/apache/tika/fork/ForkTestParser.java  |   21 +-
 .../tika/fork/UpperCasingContentHandler.java       |    7 +-
 .../java/org/apache/tika/io/EndianUtilsTest.java   |   38 +-
 .../java/org/apache/tika/io/FilenameUtilsTest.java |   39 +-
 .../apache/tika/io/LookaheadInputStreamTest.java   |   20 +-
 .../java/org/apache/tika/io/TailStreamTest.java    |   87 +-
 .../org/apache/tika/io/TemporaryResourcesTest.java |    6 +-
 .../org/apache/tika/io/TikaInputStreamTest.java    |   56 +-
 .../tika/language/LanguageIdentifierTest.java      |   44 +-
 .../apache/tika/language/LanguageProfileTest.java  |    2 +-
 .../tika/language/LanguageProfilerBuilderTest.java |   32 +-
 .../tika/language/detect/LanguageNamesTest.java    |   26 +-
 .../org/apache/tika/metadata/TestMetadata.java     |  212 +-
 .../tika/metadata/filter/MockUpperCaseFilter.java  |    4 +-
 .../tika/metadata/filter/TestMetadataFilter.java   |   31 +-
 .../org/apache/tika/mime/CustomReaderTest.java     |  120 +-
 .../java/org/apache/tika/mime/MediaTypeTest.java   |   86 +-
 .../org/apache/tika/mime/MimeDetectionTest.java    |  135 +-
 .../org/apache/tika/mime/MimeTypesReaderTest.java  |  297 ++-
 .../java/org/apache/tika/mime/PatternsTest.java    |   18 +-
 .../tika/mime/ProbabilisticMimeDetectionTest.java  |  114 +-
 .../ProbabilisticMimeDetectionTestWithTika.java    |  100 +-
 .../apache/tika/parser/CompositeParserTest.java    |  158 +-
 .../tika/parser/DummyInitializableParser.java      |   29 +-
 .../tika/parser/DummyParameterizedParser.java      |   71 +-
 .../java/org/apache/tika/parser/DummyParser.java   |   57 +-
 .../tika/parser/InitializableParserTest.java       |   14 +-
 .../tika/parser/ParameterizedParserTest.java       |   35 +-
 .../apache/tika/parser/ParserDecoratorTest.java    |   57 +-
 .../org/apache/tika/parser/mock/MockParser.java    |   72 +-
 .../apache/tika/parser/mock/MockParserFactory.java |    8 +-
 .../org/apache/tika/parser/mock/VowelParser.java   |   11 +-
 .../tika/parser/multiple/MultipleParserTest.java   |  137 +-
 .../org/apache/tika/pipes/emitter/MockEmitter.java |    8 +-
 .../tika/pipes/fetcher/FileSystemFetcherTest.java  |   10 +-
 .../fetchiterator/FileSystemFetchIteratorTest.java |   25 +-
 .../tika/sax/BasicContentHandlerFactoryTest.java   |   98 +-
 .../apache/tika/sax/BodyContentHandlerTest.java    |    9 +-
 .../apache/tika/sax/LinkContentHandlerTest.java    |   29 +-
 .../apache/tika/sax/OfflineContentHandlerTest.java |    9 +-
 .../tika/sax/RichTextContentHandlerTest.java       |   15 +-
 .../apache/tika/sax/SecureContentHandlerTest.java  |    7 +-
 .../java/org/apache/tika/sax/SerializerTest.java   |   55 +-
 .../apache/tika/sax/XHTMLContentHandlerTest.java   |   77 +-
 .../org/apache/tika/utils/AnnotationUtilsTest.java |   47 +-
 .../org/apache/tika/utils/CharsetUtilsTest.java    |   14 +-
 .../org/apache/tika/utils/ConcurrentUtilsTest.java |  126 +-
 .../java/org/apache/tika/utils/RegexUtilsTest.java |   31 +-
 .../apache/tika/utils/ServiceLoaderUtilsTest.java  |   28 +-
 tika-core/src/test/resources/log4j.properties      |    1 +
 .../org/apache/tika/config/FileCommandDetector.xml |    2 +-
 .../org/apache/tika/config/TIKA-1762-executors.xml |   62 +-
 .../apache/tika/fuzzing/general/ByteFlipper.java   |    2 +-
 tika-parent/checkstyle.xml                         |  139 ++
 tika-parent/pom.xml                                |   42 +-
 tika-parsers/pom.xml                               |   38 +-
 .../tika/parser/recognition/AgeRecogniser.java     |  182 +-
 .../parser/recognition/AgeRecogniserConfig.java    |   59 +-
 .../tika/parser/recognition/AgeRecogniserTest.java |   53 +-
 .../tika/dl/imagerec/DL4JInceptionV3Net.java       |  138 +-
 .../org/apache/tika/dl/imagerec/DL4JVGG16Net.java  |   73 +-
 .../tika/dl/imagerec/DL4JInceptionV3NetTest.java   |   14 +-
 .../apache/tika/dl/imagerec/DL4JVGG16NetTest.java  |   15 +-
 .../tika/parser/captioning/CaptionObject.java      |    6 +-
 .../captioning/tf/TensorflowRESTCaptioner.java     |   44 +-
 .../tika/parser/pot/PooledTimeSeriesParser.java    |   85 +-
 .../tika/parser/recognition/ObjectRecogniser.java  |   36 +-
 .../recognition/ObjectRecognitionParser.java       |   50 +-
 .../tika/parser/recognition/RecognisedObject.java  |    7 +-
 .../recognition/tf/TensorflowImageRecParser.java   |   97 +-
 .../recognition/tf/TensorflowRESTRecogniser.java   |   54 +-
 .../tf/TensorflowRESTVideoRecogniser.java          |   30 +-
 .../tika/parser/captioning/tf/model_info.xml       |    3 +-
 .../recognition/tika-config-tflow-video-rest.xml   |    3 +-
 .../recognition/ObjectRecognitionParserTest.java   |   96 +-
 .../tf/TensorflowImageRecParserTest.java           |   28 +-
 .../tf/TensorflowVideoRecParserTest.java           |   30 +-
 .../parser/ctakes/CTAKESAnnotationProperty.java    |   16 +-
 .../apache/tika/parser/ctakes/CTAKESConfig.java    |  249 +-
 .../tika/parser/ctakes/CTAKESContentHandler.java   |  178 +-
 .../apache/tika/parser/ctakes/CTAKESParser.java    |   42 +-
 .../tika/parser/ctakes/CTAKESSerializer.java       |    5 +-
 .../org/apache/tika/parser/ctakes/CTAKESUtils.java |  423 ++--
 .../java/org/apache/tika/parser/geo/GeoParser.java |   85 +-
 .../apache/tika/parser/geo/GeoParserConfig.java    |   33 +-
 .../java/org/apache/tika/parser/geo/GeoTag.java    |   73 +-
 .../tika/parser/geo/NameEntityExtractor.java       |   23 +-
 .../parser/geo/gazetteer/GeoGazetteerClient.java   |  157 +-
 .../apache/tika/parser/geo/gazetteer/Location.java |  107 +-
 .../tika/parser/journal/GrobidRESTParser.java      |   62 +-
 .../apache/tika/parser/journal/JournalParser.java  |   44 +-
 .../apache/tika/parser/journal/TEIDOMParser.java   |  159 +-
 .../org/apache/tika/parser/ner/NERecogniser.java   |    8 +-
 .../apache/tika/parser/ner/NamedEntityParser.java  |   76 +-
 .../parser/ner/corenlp/CoreNLPNERecogniser.java    |   93 +-
 .../tika/parser/ner/grobid/GrobidNERecogniser.java |  208 +-
 .../tika/parser/ner/mitie/MITIENERecogniser.java   |  115 +-
 .../tika/parser/ner/nltk/NLTKNERecogniser.java     |   69 +-
 .../parser/ner/opennlp/OpenNLPNERecogniser.java    |   42 +-
 .../tika/parser/ner/opennlp/OpenNLPNameFinder.java |   37 +-
 .../tika/parser/ner/regex/RegexNERecogniser.java   |   31 +-
 .../parser/sentiment/SentimentAnalysisParser.java  |   33 +-
 .../tika/parser/ctakes/CTAKESConfig.properties     |    2 +-
 .../tika/parser/geo/GeoTopicConfig.properties      |    2 +-
 .../tika/parser/journal/GrobidExtractor.properties |    2 +-
 .../tika/parser/ner/grobid/GrobidServer.properties |    4 +-
 .../tika/parser/ner/nltk/NLTKServer.properties     |    2 +-
 .../org/apache/tika/parser/geo/GeoParserTest.java  |  147 +-
 .../tika/parser/journal/JournalParserTest.java     |    5 +-
 .../org/apache/tika/parser/journal/TEITest.java    |   32 +-
 .../tika/parser/ner/NamedEntityParserTest.java     |   29 +-
 .../tika/parser/ner/nltk/NLTKNERecogniserTest.java |   20 +-
 .../parser/ner/regex/RegexNERecogniserTest.java    |   18 +-
 .../sentiment/SentimentAnalysisParserTest.java     |   49 +-
 .../tika/config/TIKA-3078-geo.topic.GeoParser.xml  |   22 +-
 tika-parsers/tika-parsers-classic/pom.xml          |   89 +-
 .../apache/tika/detect/apple/BPListDetector.java   |   66 +-
 .../apache/tika/detect/apple/IWorkDetector.java    |   18 +-
 .../tika/parser/apple/AppleSingleFileParser.java   |   60 +-
 .../org/apache/tika/parser/apple/PListParser.java  |   92 +-
 .../tika/parser/iwork/AutoPageNumberUtils.java     |  146 +-
 .../tika/parser/iwork/IWorkPackageParser.java      |  271 ++-
 .../tika/parser/iwork/KeynoteContentHandler.java   |   36 +-
 .../tika/parser/iwork/NumbersContentHandler.java   |   16 +-
 .../tika/parser/iwork/PagesContentHandler.java     |  436 ++--
 .../parser/iwork/iwana/IWork13PackageParser.java   |  198 +-
 .../parser/iwork/iwana/IWork18PackageParser.java   |  180 +-
 .../apache/tika/parser/apple/PListParserTest.java  |   11 +-
 .../tika/parser/iwork/AutoPageNumberUtilsTest.java |   85 +-
 .../apache/tika/parser/iwork/IWorkParserTest.java  |  141 +-
 .../tika/parser/iwork/iwana/IWork13ParserTest.java |   23 +-
 .../org/apache/tika/parser/audio/AudioParser.java  |   32 +-
 .../org/apache/tika/parser/audio/MidiParser.java   |   41 +-
 .../org/apache/tika/parser/mp3/AudioFrame.java     |  239 +-
 .../java/org/apache/tika/parser/mp3/ID3Tags.java   |  294 +--
 .../org/apache/tika/parser/mp3/ID3v1Handler.java   |  103 +-
 .../org/apache/tika/parser/mp3/ID3v22Handler.java  |   71 +-
 .../org/apache/tika/parser/mp3/ID3v23Handler.java  |   31 +-
 .../org/apache/tika/parser/mp3/ID3v24Handler.java  |   35 +-
 .../org/apache/tika/parser/mp3/ID3v2Frame.java     |  418 ++--
 .../org/apache/tika/parser/mp3/LyricsHandler.java  |   82 +-
 .../java/org/apache/tika/parser/mp3/MP3Frame.java  |    2 +-
 .../java/org/apache/tika/parser/mp3/Mp3Parser.java |  210 +-
 .../org/apache/tika/parser/mp3/MpegStream.java     |  445 ++--
 .../apache/tika/parser/mp4/ISO6709Extractor.java   |   26 +-
 .../java/org/apache/tika/parser/mp4/MP4Parser.java |  172 +-
 .../org/apache/tika/parser/video/FLVParser.java    |   81 +-
 .../apache/tika/parser/audio/AudioParserTest.java  |   15 +-
 .../apache/tika/parser/audio/MidiParserTest.java   |    9 +-
 .../org/apache/tika/parser/mp3/Mp3ParserTest.java  |  151 +-
 .../org/apache/tika/parser/mp3/MpegStreamTest.java |   93 +-
 .../org/apache/tika/parser/mp4/MP4ParserTest.java  |   30 +-
 .../apache/tika/parser/video/FLVParserTest.java    |    7 +-
 .../java/org/apache/tika/parser/dwg/DWGParser.java |  336 ++-
 .../java/org/apache/tika/parser/prt/PRTParser.java |  413 ++--
 .../org/apache/tika/parser/dwg/DWGParserTest.java  |   99 +-
 .../org/apache/tika/parser/prt/PRTParserTest.java  |  135 +-
 .../org/apache/tika/parser/asm/ClassParser.java    |   17 +-
 .../apache/tika/parser/asm/XHTMLClassVisitor.java  |   45 +-
 .../apache/tika/parser/code/SourceCodeParser.java  |   36 +-
 .../tika/parser/executable/ExecutableParser.java   |  656 ++---
 .../java/org/apache/tika/parser/mat/MatParser.java |   44 +-
 .../org/apache/tika/parser/sas/SAS7BDATParser.java |   60 +-
 .../apache/tika/parser/asm/ClassParserTest.java    |   28 +-
 .../tika/parser/code/SourceCodeParserTest.java     |   56 +-
 .../parser/executable/ExecutableParserTest.java    |   32 +-
 .../org/apache/tika/parser/mat/MatParserTest.java  |    3 +-
 .../apache/tika/parser/sas/SAS7BDATParserTest.java |   37 +-
 .../org/apache/tika/parser/crypto/Pkcs7Parser.java |   41 +-
 .../org/apache/tika/parser/crypto/TSDParser.java   |  257 +-
 .../apache/tika/parser/crypto/Pkcs7ParserTest.java |    3 +-
 .../apache/tika/parser/crypto/TSDParserTest.java   |   16 +-
 .../parser/digestutils/BouncyCastleDigester.java   |   13 +-
 .../tika/parser/digestutils/CommonsDigester.java   |   69 +-
 .../tika/parser/font/AdobeFontMetricParser.java    |  167 +-
 .../apache/tika/parser/font/TrueTypeParser.java    |   27 +-
 .../apache/tika/parser/font/FontParsersTest.java   |   28 +-
 .../sax/boilerpipe/BoilerpipeContentHandler.java   |   46 +-
 .../org/apache/tika/parser/html/DataURIScheme.java |   13 +-
 .../parser/html/DataURISchemeParseException.java   |    4 +-
 .../apache/tika/parser/html/DataURISchemeUtil.java |   14 +-
 .../apache/tika/parser/html/DefaultHtmlMapper.java |  122 +-
 .../tika/parser/html/HtmlEncodingDetector.java     |   66 +-
 .../org/apache/tika/parser/html/HtmlHandler.java   |  104 +-
 .../org/apache/tika/parser/html/HtmlParser.java    |   83 +-
 .../tika/parser/html/XHTMLDowngradeHandler.java    |   20 +-
 .../html/charsetdetector/CharsetAliases.java       |   55 +-
 .../charsetdetector/CharsetDetectionResult.java    |   12 +-
 .../parser/html/charsetdetector/MetaProcessor.java |   18 +-
 .../parser/html/charsetdetector/PreScanner.java    |   83 +-
 .../StandardHtmlEncodingDetector.java              |   28 +-
 .../charsets/XUserDefinedCharset.java              |    8 +-
 .../tika/parser/html/DataURISchemeParserTest.java  |   19 +-
 .../tika/parser/html/HtmlEncodingDetectorTest.java |   60 +-
 .../apache/tika/parser/html/HtmlParserTest.java    |  705 +++---
 .../html/StandardHtmlEncodingDetectorTest.java     |  139 +-
 .../tika/parser/image/AbstractImageParser.java     |   46 +-
 .../org/apache/tika/parser/image/BPGParser.java    |   30 +-
 .../org/apache/tika/parser/image/HeifParser.java   |   33 +-
 .../org/apache/tika/parser/image/ICNSParser.java   |   55 +-
 .../org/apache/tika/parser/image/ICNSType.java     |  241 +-
 .../tika/parser/image/ImageMetadataExtractor.java  |  159 +-
 .../org/apache/tika/parser/image/ImageParser.java  |   63 +-
 .../org/apache/tika/parser/image/JpegParser.java   |   12 +-
 .../apache/tika/parser/image/MetadataFields.java   |    5 +-
 .../org/apache/tika/parser/image/PSDParser.java    |   43 +-
 .../org/apache/tika/parser/image/TiffParser.java   |   11 +-
 .../org/apache/tika/parser/image/WebPParser.java   |   11 +-
 .../apache/tika/parser/image/HeifParserTest.java   |   16 +-
 .../apache/tika/parser/image/ICNSParserTest.java   |   33 +-
 .../parser/image/ImageMetadataExtractorTest.java   |   17 +-
 .../apache/tika/parser/image/ImageParserTest.java  |  110 +-
 .../apache/tika/parser/image/JpegParserTest.java   |   76 +-
 .../apache/tika/parser/image/PSDParserTest.java    |   13 +-
 .../apache/tika/parser/image/WebPParserTest.java   |    3 +-
 .../apache/tika/parser/jdbc/AbstractDBParser.java  |   33 +-
 .../apache/tika/parser/jdbc/JDBCTableReader.java   |   79 +-
 .../apache/tika/parser/mailcommons/MailUtil.java   |    9 +-
 .../tika/parser/mailcommons/MailUtilTest.java      |   11 +-
 .../tika/parser/mail/MailContentHandler.java       |  207 +-
 .../org/apache/tika/parser/mail/RFC822Parser.java  |   30 +-
 .../org/apache/tika/parser/mbox/MboxParser.java    |   36 +-
 .../apache/tika/parser/mail/RFC822ParserTest.java  |  175 +-
 .../apache/tika/parser/mbox/MboxParserTest.java    |   25 +-
 .../detect/microsoft/POIFSContainerDetector.java   |   87 +-
 .../detect/microsoft/ooxml/OPCPackageDetector.java |  224 +-
 .../microsoft/MSEmbeddedStreamTranslator.java      |   24 +-
 .../tika/parser/microsoft/AbstractListManager.java |   23 +-
 .../parser/microsoft/AbstractOfficeParser.java     |   61 +-
 .../parser/microsoft/AbstractPOIFSExtractor.java   |   57 +-
 .../org/apache/tika/parser/microsoft/Cell.java     |    3 +-
 .../tika/parser/microsoft/CellDecorator.java       |    3 +-
 .../apache/tika/parser/microsoft/EMFParser.java    |   78 +-
 .../tika/parser/microsoft/ExcelExtractor.java      |  142 +-
 .../tika/parser/microsoft/FormattingUtils.java     |   23 +-
 .../tika/parser/microsoft/HSLFExtractor.java       |  124 +-
 .../tika/parser/microsoft/JackcessExtractor.java   |   81 +-
 .../tika/parser/microsoft/JackcessParser.java      |   33 +-
 .../apache/tika/parser/microsoft/LinkedCell.java   |    3 +-
 .../apache/tika/parser/microsoft/ListManager.java  |   33 +-
 .../tika/parser/microsoft/MSOwnerFileParser.java   |   37 +-
 .../apache/tika/parser/microsoft/NumberCell.java   |    3 +-
 .../apache/tika/parser/microsoft/OfficeParser.java |  146 +-
 .../tika/parser/microsoft/OfficeParserConfig.java  |   69 +-
 .../tika/parser/microsoft/OldExcelParser.java      |   23 +-
 .../tika/parser/microsoft/OutlookExtractor.java    |  361 ++-
 .../tika/parser/microsoft/SummaryExtractor.java    |   68 +-
 .../apache/tika/parser/microsoft/TNEFParser.java   |   44 +-
 .../org/apache/tika/parser/microsoft/TextCell.java |    3 +-
 .../parser/microsoft/TikaExcelDataFormatter.java   |   11 +-
 .../parser/microsoft/TikaExcelGeneralFormat.java   |    2 +-
 .../apache/tika/parser/microsoft/WMFParser.java    |   24 +-
 .../tika/parser/microsoft/WordExtractor.java       |   98 +-
 .../tika/parser/microsoft/chm/ChmAccessor.java     |   10 +-
 .../tika/parser/microsoft/chm/ChmAssert.java       |  139 +-
 .../tika/parser/microsoft/chm/ChmBlockInfo.java    |  103 +-
 .../tika/parser/microsoft/chm/ChmCommons.java      |  293 +--
 .../tika/parser/microsoft/chm/ChmConstants.java    |   54 +-
 .../microsoft/chm/ChmDirectoryListingSet.java      |  234 +-
 .../tika/parser/microsoft/chm/ChmExtractor.java    |  284 +--
 .../tika/parser/microsoft/chm/ChmItsfHeader.java   |  192 +-
 .../tika/parser/microsoft/chm/ChmItspHeader.java   |  271 +--
 .../tika/parser/microsoft/chm/ChmLzxBlock.java     |  455 ++--
 .../tika/parser/microsoft/chm/ChmLzxState.java     |  262 +-
 .../parser/microsoft/chm/ChmLzxcControlData.java   |  147 +-
 .../parser/microsoft/chm/ChmLzxcResetTable.java    |  129 +-
 .../tika/parser/microsoft/chm/ChmParser.java       |   39 +-
 .../tika/parser/microsoft/chm/ChmPmgiHeader.java   |   51 +-
 .../tika/parser/microsoft/chm/ChmPmglHeader.java   |   62 +-
 .../tika/parser/microsoft/chm/ChmSection.java      |   61 +-
 .../tika/parser/microsoft/chm/ChmWrapper.java      |   12 +-
 .../microsoft/chm/DirectoryListingEntry.java       |   41 +-
 .../tika/parser/microsoft/onenote/CompactID.java   |    9 +-
 .../tika/parser/microsoft/onenote/Error.java       |   11 +-
 .../parser/microsoft/onenote/ExtendedGUID.java     |   11 +-
 .../microsoft/onenote/FileChunkReference.java      |   20 +-
 .../tika/parser/microsoft/onenote/FileNode.java    |  102 +-
 .../microsoft/onenote/FileNodeListHeader.java      |   32 +-
 .../tika/parser/microsoft/onenote/FileNodePtr.java |    3 +-
 .../parser/microsoft/onenote/FileNodeUnion.java    |   33 +-
 .../microsoft/onenote/FndStructureConstants.java   |   44 +-
 .../apache/tika/parser/microsoft/onenote/GUID.java |   65 +-
 .../apache/tika/parser/microsoft/onenote/JCID.java |   38 +-
 .../microsoft/onenote/JCIDPropertySetTypeEnum.java |   76 +-
 .../onenote/ObjectDeclarationWithRefCount.java     |   27 +-
 .../onenote/ObjectDeclarationWithRefCountBody.java |    3 +-
 .../onenote/ObjectSpaceObjectPropSet.java          |   12 +-
 ...ctSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java |    6 +-
 .../onenote/OneNoteDirectFileResource.java         |    9 +-
 .../parser/microsoft/onenote/OneNoteDocument.java  |   13 +-
 .../parser/microsoft/onenote/OneNoteHeader.java    |   18 +-
 .../onenote/OneNoteLegacyDumpStrings.java          |   47 +-
 .../parser/microsoft/onenote/OneNoteParser.java    |  149 +-
 .../microsoft/onenote/OneNotePropertyEnum.java     |  208 +-
 .../microsoft/onenote/OneNotePropertyId.java       |    7 +-
 .../tika/parser/microsoft/onenote/OneNotePtr.java  |  515 ++--
 .../microsoft/onenote/OneNoteTreeWalker.java       |  215 +-
 .../onenote/OneNoteTreeWalkerOptions.java          |   14 +-
 .../parser/microsoft/onenote/PropertyIDType.java   |    7 +-
 .../tika/parser/microsoft/onenote/PropertySet.java |   37 +-
 .../parser/microsoft/onenote/PropertyValue.java    |   20 +-
 .../tika/parser/microsoft/onenote/Revision.java    |   23 +-
 .../microsoft/onenote/RootObjectReference.java     |    3 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |  168 +-
 .../parser/microsoft/ooxml/MetadataExtractor.java  |   79 +-
 .../parser/microsoft/ooxml/OOXMLExtractor.java     |   10 +-
 .../microsoft/ooxml/OOXMLExtractorFactory.java     |   97 +-
 .../tika/parser/microsoft/ooxml/OOXMLParser.java   |   60 +-
 .../microsoft/ooxml/OOXMLTikaBodyPartHandler.java  |   72 +-
 .../ooxml/OOXMLWordAndPowerPointTextHandler.java   |  110 +-
 .../ooxml/POIXMLTextExtractorDecorator.java        |    8 +-
 .../microsoft/ooxml/ParagraphProperties.java       |   18 +-
 .../tika/parser/microsoft/ooxml/RunProperties.java |    9 +-
 .../ooxml/SXSLFPowerPointExtractorDecorator.java   |  258 +-
 .../ooxml/SXWPFWordExtractorDecorator.java         |   93 +-
 .../ooxml/XSLFPowerPointExtractorDecorator.java    |   95 +-
 .../ooxml/XSSFBExcelExtractorDecorator.java        |   48 +-
 .../ooxml/XSSFExcelExtractorDecorator.java         |  181 +-
 .../parser/microsoft/ooxml/XWPFListManager.java    |   21 +-
 .../ooxml/XWPFWordExtractorDecorator.java          |  124 +-
 .../microsoft/ooxml/xps/XPSExtractorDecorator.java |  122 +-
 .../microsoft/ooxml/xps/XPSPageContentHandler.java |   98 +-
 .../microsoft/ooxml/xps/XPSTextExtractor.java      |    7 +-
 .../xslf/XSLFEventBasedPowerPointExtractor.java    |   16 +-
 .../ooxml/xwpf/XWPFEventBasedWordExtractor.java    |   63 +-
 .../microsoft/ooxml/xwpf/XWPFStylesShim.java       |   28 +-
 .../ooxml/xwpf/ml2006/AbstractPartHandler.java     |   11 +-
 .../ooxml/xwpf/ml2006/BinaryDataHandler.java       |   12 +-
 .../ooxml/xwpf/ml2006/CorePropertiesHandler.java   |   17 +-
 .../xwpf/ml2006/ExtendedPropertiesHandler.java     |    3 +-
 .../microsoft/ooxml/xwpf/ml2006/PartHandler.java   |    7 +-
 .../ooxml/xwpf/ml2006/RelationshipsHandler.java    |    5 +-
 .../ooxml/xwpf/ml2006/RelationshipsManager.java    |    3 +-
 .../ooxml/xwpf/ml2006/Word2006MLDocHandler.java    |   40 +-
 .../ooxml/xwpf/ml2006/Word2006MLParser.java        |   21 +-
 .../ml2006/WordAndPowerPointTextPartHandler.java   |   17 +-
 .../parser/microsoft/pst/OutlookPSTParser.java     |   62 +-
 .../parser/microsoft/rtf/RTFEmbObjHandler.java     |   40 +-
 .../parser/microsoft/rtf/RTFObjDataParser.java     |   62 +-
 .../tika/parser/microsoft/rtf/RTFParser.java       |   53 +-
 .../tika/parser/microsoft/rtf/TextExtractor.java   |   93 +-
 .../microsoft/xml/AbstractXML2003Parser.java       |   41 +-
 .../parser/microsoft/xml/HyperlinkHandler.java     |   23 +-
 .../parser/microsoft/xml/SpreadsheetMLParser.java  |   50 +-
 .../tika/parser/microsoft/xml/WordMLParser.java    |   75 +-
 .../AbstractPOIContainerExtractionTest.java        |   16 +-
 .../tika/parser/microsoft/EMFParserTest.java       |   13 +-
 .../tika/parser/microsoft/ExcelParserTest.java     |   69 +-
 .../tika/parser/microsoft/JackcessParserTest.java  |   40 +-
 .../parser/microsoft/MSOwnerFileParserTest.java    |    7 +-
 .../tika/parser/microsoft/OfficeParserTest.java    |    4 +-
 .../tika/parser/microsoft/OldExcelParserTest.java  |   13 +-
 .../tika/parser/microsoft/OutlookParserTest.java   |   83 +-
 .../microsoft/POIContainerExtractionTest.java      |   19 +-
 .../parser/microsoft/PowerPointParserTest.java     |   47 +-
 .../tika/parser/microsoft/ProjectParserTest.java   |   27 +-
 .../tika/parser/microsoft/PublisherParserTest.java |   13 +-
 .../parser/microsoft/SolidworksParserTest.java     |   46 +-
 .../tika/parser/microsoft/TNEFParserTest.java      |    9 +-
 .../tika/parser/microsoft/VisioParserTest.java     |   13 +-
 .../tika/parser/microsoft/WMFParserTest.java       |    9 +-
 .../tika/parser/microsoft/WordParserTest.java      |  123 +-
 .../parser/microsoft/WriteProtectedParserTest.java |    9 +-
 .../parser/microsoft/chm/TestChmBlockInfo.java     |   50 +-
 .../parser/microsoft/chm/TestChmExtraction.java    |  161 +-
 .../parser/microsoft/chm/TestChmExtractor.java     |   16 +-
 .../parser/microsoft/chm/TestChmItsfHeader.java    |   40 +-
 .../parser/microsoft/chm/TestChmItspHeader.java    |   60 +-
 .../tika/parser/microsoft/chm/TestChmLzxState.java |   37 +-
 .../microsoft/chm/TestChmLzxcControlData.java      |   54 +-
 .../microsoft/chm/TestChmLzxcResetTable.java       |   59 +-
 .../microsoft/chm/TestDirectoryListingEntry.java   |    9 +-
 .../tika/parser/microsoft/chm/TestParameters.java  |   34 +-
 .../tika/parser/microsoft/chm/TestPmglHeader.java  |   24 +-
 .../microsoft/onenote/OneNoteParserTest.java       |   83 +-
 .../ooxml/OOXMLContainerExtractionTest.java        |   24 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |  274 +--
 .../parser/microsoft/ooxml/SXSLFExtractorTest.java |  122 +-
 .../parser/microsoft/ooxml/SXWPFExtractorTest.java |   97 +-
 .../parser/microsoft/ooxml/TruncatedOOXMLTest.java |   41 +-
 .../parser/microsoft/ooxml/xps/XPSParserTest.java  |   68 +-
 .../ooxml/xwpf/ml2006/Word2006MLParserTest.java    |   29 +-
 .../parser/microsoft/pst/OutlookPSTParserTest.java |   84 +-
 .../tika/parser/microsoft/rtf/RTFParserTest.java   |   91 +-
 .../parser/microsoft/xml/XML2003ParserTest.java    |   35 +-
 .../test-documents/testXPSWithDataDescriptor.xps   |  Bin 0 -> 44523 bytes
 .../test-documents/testXPSWithDataDescriptor2.xps  |  Bin 0 -> 51175 bytes
 .../apache/tika/detect/ole/MiscOLEDetector.java    |   69 +-
 .../java/org/apache/tika/parser/dbf/DBFCell.java   |   30 +-
 .../apache/tika/parser/dbf/DBFColumnHeader.java    |   68 +-
 .../org/apache/tika/parser/dbf/DBFFileHeader.java  |   46 +-
 .../java/org/apache/tika/parser/dbf/DBFParser.java |   34 +-
 .../java/org/apache/tika/parser/dbf/DBFReader.java |  167 +-
 .../java/org/apache/tika/parser/dbf/DBFRow.java    |   16 +-
 .../apache/tika/parser/dif/DIFContentHandler.java  |  241 +-
 .../java/org/apache/tika/parser/dif/DIFParser.java |   85 +-
 .../apache/tika/parser/epub/EpubContentParser.java |   29 +-
 .../org/apache/tika/parser/epub/EpubParser.java    |  124 +-
 .../apache/tika/parser/hwp/HwpStreamReader.java    |    2 +-
 .../apache/tika/parser/hwp/HwpTextExtractorV5.java |  111 +-
 .../org/apache/tika/parser/hwp/HwpV5Parser.java    |   16 +-
 .../apache/tika/parser/mif/MIFContentHandler.java  |   17 +-
 .../org/apache/tika/parser/mif/MIFExtractor.java   |   34 +-
 .../java/org/apache/tika/parser/mif/MIFParser.java |   42 +-
 .../parser/odf/FlatOpenDocumentMacroHandler.java   |   43 +-
 .../tika/parser/odf/FlatOpenDocumentParser.java    |  101 +-
 .../parser/odf/NSNormalizerContentHandler.java     |   29 +-
 .../tika/parser/odf/OpenDocumentBodyHandler.java   |  311 +--
 .../tika/parser/odf/OpenDocumentContentParser.java |   40 +-
 .../tika/parser/odf/OpenDocumentMacroHandler.java  |   16 +-
 .../parser/odf/OpenDocumentManifestHandler.java    |   35 +-
 .../tika/parser/odf/OpenDocumentMetaParser.java    |  101 +-
 .../apache/tika/parser/odf/OpenDocumentParser.java |  156 +-
 .../tika/parser/wordperfect/QPWTextExtractor.java  |  251 +-
 .../tika/parser/wordperfect/QuattroProParser.java  |   34 +-
 .../tika/parser/wordperfect/WP5Charsets.java       |  289 ++-
 .../wordperfect/WP5DocumentAreaExtractor.java      |   66 +-
 .../tika/parser/wordperfect/WP6Charsets.java       |  750 +++---
 .../wordperfect/WP6DocumentAreaExtractor.java      |   58 +-
 .../wordperfect/WPDocumentAreaExtractor.java       |   23 +-
 .../tika/parser/wordperfect/WPInputStream.java     |   25 +-
 .../tika/parser/wordperfect/WPPrefixArea.java      |   37 +-
 .../parser/wordperfect/WPPrefixAreaExtractor.java  |   10 +-
 .../tika/parser/wordperfect/WordPerfectParser.java |   78 +-
 .../org/apache/tika/parser/dbf/DBFParserTest.java  |   36 +-
 .../org/apache/tika/parser/dif/DIFParserTest.java  |   25 +-
 .../apache/tika/parser/epub/EpubParserTest.java    |   30 +-
 .../apache/tika/parser/hwp/HwpV5ParserTest.java    |   17 +-
 .../tika/parser/ibooks/iBooksParserTest.java       |   18 +-
 .../org/apache/tika/parser/mif/MIFParserTest.java  |    9 +-
 .../org/apache/tika/parser/odf/ODFParserTest.java  |  265 +-
 .../tika/parser/wordperfect/QuattroProTest.java    |   12 +-
 .../tika/parser/wordperfect/WPInputStreamTest.java |   14 +-
 .../tika/parser/wordperfect/WordPerfectTest.java   |   20 +-
 .../resources/test-documents/testODTEncrypted.odt  |  Bin 0 -> 12714 bytes
 .../org/apache/tika/parser/feed/FeedParser.java    |   76 +-
 .../apache/tika/parser/iptc/IptcAnpaParser.java    | 1404 +++++------
 .../apache/tika/parser/feed/FeedParserTest.java    |   23 +-
 .../apache/tika/parser/ocr/ImagePreprocessor.java  |   67 +-
 .../apache/tika/parser/ocr/TesseractOCRConfig.java |  197 +-
 .../apache/tika/parser/ocr/TesseractOCRParser.java |  364 ++-
 .../apache/tika/parser/ocr/tess4j/ImageDeskew.java |   10 +-
 .../apache/tika/parser/ocr/tess4j/ImageUtil.java   |   17 +-
 .../tika/parser/ocr/TesseractOCRConfigTest.java    |  149 +-
 .../tika/parser/ocr/TesseractOCRParserTest.java    |  101 +-
 .../resources/test-configs/TIKA-2705-tesseract.xml |   26 +-
 .../tika-config-tesseract-arbitrary.xml            |   22 +-
 .../test-configs/tika-config-tesseract-full.xml    |   38 +-
 .../tika-config-tesseract-load-langs.xml           |   20 +-
 .../test-configs/tika-config-tesseract-partial.xml |   32 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  484 ++--
 .../org/apache/tika/parser/pdf/AccessChecker.java  |   18 +-
 .../tika/parser/pdf/ImageGraphicsEngine.java       |  290 ++-
 .../java/org/apache/tika/parser/pdf/OCR2XHTML.java |   20 +-
 .../java/org/apache/tika/parser/pdf/PDF2XHTML.java |   97 +-
 .../tika/parser/pdf/PDFEncodedStringDecoder.java   |    6 +-
 .../tika/parser/pdf/PDFMarkedContent2XHTML.java    |  207 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java |  133 +-
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  360 +--
 .../apache/tika/parser/pdf/PDFPreflightParser.java |   82 +-
 .../tika/parser/pdf/PDMetadataExtractor.java       |   54 +-
 .../org/apache/tika/parser/pdf/XFAExtractor.java   |   75 +-
 .../apache/tika/parser/pdf/AccessCheckerTest.java  |    6 +-
 .../parser/pdf/PDFMarkedContent2XHTMLTest.java     |   21 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  324 +--
 .../tika/parser/pdf/PDFPreflightParserTest.java    |   18 +-
 .../testPDF_deeplyEmbeddedAttachments.pdf          |  Bin 0 -> 122221 bytes
 .../apache/tika/parser/pkg/CompressorParser.java   |   54 +-
 .../org/apache/tika/parser/pkg/PackageParser.java  |  223 +-
 .../java/org/apache/tika/parser/pkg/RarParser.java |   30 +-
 .../apache/tika/parser/pkg/AbstractPkgTest.java    |   94 +-
 .../org/apache/tika/parser/pkg/ArParserTest.java   |   11 +-
 .../apache/tika/parser/pkg/Bzip2ParserTest.java    |   37 +-
 .../apache/tika/parser/pkg/CompressParserTest.java |   39 +-
 .../tika/parser/pkg/CompressorParserTest.java      |   21 +-
 .../org/apache/tika/parser/pkg/GzipParserTest.java |   29 +-
 .../apache/tika/parser/pkg/PackageParserTest.java  |   19 +-
 .../org/apache/tika/parser/pkg/RarParserTest.java  |   99 +-
 .../apache/tika/parser/pkg/Seven7ParserTest.java   |   69 +-
 .../org/apache/tika/parser/pkg/TarParserTest.java  |   67 +-
 .../org/apache/tika/parser/pkg/ZipParserTest.java  |   98 +-
 .../org/apache/tika/parser/pkg/ZlibParserTest.java |   33 +-
 .../src/test/resources/test-documents/testSVG.svg  |    8 +-
 .../java/org/apache/tika/parser/csv/CSVParams.java |    4 +-
 .../java/org/apache/tika/parser/csv/CSVResult.java |   17 +-
 .../org/apache/tika/parser/csv/CSVSniffer.java     |   84 +-
 .../apache/tika/parser/csv/TextAndCSVParser.java   |  169 +-
 .../tika/parser/strings/Latin1StringsParser.java   |  145 +-
 .../apache/tika/parser/strings/StringsConfig.java  |  163 +-
 .../tika/parser/strings/StringsEncoding.java       |   62 +-
 .../apache/tika/parser/strings/StringsParser.java  |  495 ++--
 .../apache/tika/parser/txt/CharsetDetector.java    |   46 +-
 .../org/apache/tika/parser/txt/CharsetMatch.java   |   10 +-
 .../apache/tika/parser/txt/CharsetRecog_2022.java  |   20 +-
 .../apache/tika/parser/txt/CharsetRecog_UTF8.java  |    8 +-
 .../tika/parser/txt/CharsetRecog_Unicode.java      |    2 +-
 .../apache/tika/parser/txt/CharsetRecog_mbcs.java  |  113 +-
 .../apache/tika/parser/txt/CharsetRecog_sbcs.java  | 1801 ++++++++------
 .../apache/tika/parser/txt/CharsetRecognizer.java  |    2 +-
 .../tika/parser/txt/Icu4jEncodingDetector.java     |   17 +-
 .../java/org/apache/tika/parser/txt/TXTParser.java |   26 +-
 .../tika/parser/txt/UniversalEncodingDetector.java |   16 +-
 .../tika/parser/txt/UniversalEncodingListener.java |   21 +-
 .../org/apache/tika/parser/csv/CSVSnifferTest.java |   51 +-
 .../tika/parser/csv/TextAndCSVParserTest.java      |  112 +-
 .../parser/strings/Latin1StringsParserTest.java    |   28 +-
 .../tika/parser/strings/StringsConfigTest.java     |  111 +-
 .../tika/parser/strings/StringsParserTest.java     |   89 +-
 .../tika/parser/txt/CharsetDetectorTest.java       |   21 +-
 .../org/apache/tika/parser/txt/TXTParserTest.java  |  126 +-
 .../test-configs/tika-config-strings-full.xml      |   18 +-
 .../test-configs/tika-config-strings-partial.xml   |   16 +-
 .../src/test/resources/test-documents/resume.html  |  140 +-
 .../tika/parser/xliff/XLIFF12ContentHandler.java   |   15 +-
 .../apache/tika/parser/xliff/XLIFF12Parser.java    |   27 +-
 .../org/apache/tika/parser/xliff/XLZParser.java    |   43 +-
 .../tika/parser/xml/AbstractMetadataHandler.java   |   46 +-
 .../xml/AttributeDependantMetadataHandler.java     |   34 +-
 .../tika/parser/xml/AttributeMetadataHandler.java  |   28 +-
 .../org/apache/tika/parser/xml/DcXMLParser.java    |   22 +-
 .../tika/parser/xml/ElementMetadataHandler.java    |   69 +-
 .../apache/tika/parser/xml/FictionBookParser.java  |   33 +-
 .../apache/tika/parser/xml/MetadataHandler.java    |   33 +-
 .../tika/parser/xml/TextAndAttributeXMLParser.java |    6 +-
 .../java/org/apache/tika/parser/xml/XMLParser.java |   39 +-
 .../org/apache/tika/parser/xml/XMLProfiler.java    |   99 +-
 .../tika/parser/xliff/XLIFF12ParserTest.java       |    5 +-
 .../apache/tika/parser/xliff/XLZParserTest.java    |   18 +-
 .../apache/tika/parser/xml/DcXMLParserTest.java    |   27 +-
 .../EmptyAndDuplicateElementsXMLParserTest.java    |   56 +-
 .../tika/parser/xml/FictionBookParserTest.java     |   10 +-
 .../parser/xml/TextAndAttributeXMLParserTest.java  |   21 +-
 .../src/test/resources/test-documents/testXML.xml  |   30 +-
 .../src/test/resources/test-documents/testXML2.xml |   10 +-
 .../src/test/resources/test-documents/testXML3.xml |   38 +-
 .../apache/tika/parser/xmp/JempboxExtractor.java   |   91 +-
 .../apache/tika/parser/xmp/XMPPacketScanner.java   |    4 +-
 .../tika/parser/xmp/JempboxExtractorTest.java      |   31 +-
 .../src/test/resources/test-documents/testXMP.xmp  |  342 ++-
 .../tika/detect/zip/CompressorConstants.java       |    3 +-
 .../detect/zip/DefaultZipContainerDetector.java    |  165 +-
 .../DeprecatedStreamingZipContainerDetector.java   |   37 +-
 .../detect/zip/DeprecatedZipContainerDetector.java |    3 -
 .../org/apache/tika/detect/zip/IPADetector.java    |   21 +-
 .../org/apache/tika/detect/zip/JarDetector.java    |   14 +-
 .../org/apache/tika/detect/zip/KMZDetector.java    |   22 +-
 .../tika/detect/zip/OpenDocumentDetector.java      |   20 +-
 .../apache/tika/detect/zip/PackageConstants.java   |    1 +
 .../apache/tika/detect/zip/StarOfficeDetector.java |   78 +-
 .../tika/detect/zip/StreamingDetectContext.java    |   16 +-
 .../detect/zip/StreamingZipContainerDetector.java  |   13 +-
 .../tika/detect/zip/ZipContainerDetector.java      |   16 +-
 .../tika/detect/zip/ZipContainerDetectorBase.java  |   47 +-
 .../org/apache/tika/zip/utils/ZipSalvager.java     |  104 +-
 .../org/apache/tika/detect/zip/ZipParserTest.java  |   14 +-
 .../org/apache/tika/parser/internal/Activator.java |   22 +-
 .../apache/tika/config/TikaDetectorConfigTest.java |   87 +-
 .../tika/config/TikaEncodingDetectorTest.java      |   82 +-
 .../apache/tika/config/TikaParserConfigTest.java   |   69 +-
 .../tika/config/TikaTranslatorConfigTest.java      |   21 +-
 .../tika/detect/TestContainerAwareDetector.java    |  278 ++-
 .../apache/tika/detect/TestDetectorLoading.java    |   15 +-
 .../tika/detect/TestFileCommandDetector.java       |   12 +-
 .../tika/extractor/EmbeddedDocumentUtilTest.java   |    3 +-
 .../java/org/apache/tika/mime/MimeTypeTest.java    |   12 +-
 .../java/org/apache/tika/mime/MimeTypesTest.java   |    4 +-
 .../java/org/apache/tika/mime/TestMimeTypes.java   |  733 +++---
 .../apache/tika/parser/AutoDetectParserTest.java   |  357 ++-
 .../tika/parser/AutoDetectReaderParserTest.java    |   24 +-
 .../parser/BouncyCastleDigestingParserTest.java    |  125 +-
 .../apache/tika/parser/DigestingParserTest.java    |  120 +-
 .../org/apache/tika/parser/ParsingReaderTest.java  |   13 +-
 .../tika/parser/RecursiveParserWrapperTest.java    |   61 +-
 .../org/apache/tika/parser/TabularFormatsTest.java |  252 +-
 .../java/org/apache/tika/parser/TestParsers.java   |   49 +-
 .../apache/tika/parser/TestXMLEntityExpansion.java |   90 +-
 .../java/org/apache/tika/parser/TestXXEInXML.java  |  115 +-
 .../java/org/apache/tika/parser/XMLTestBase.java   |   80 +-
 .../parser/apple/AppleSingleFileParserTest.java    |    8 +-
 .../apache/tika/parser/apple/PListParserTest.java  |   11 +-
 .../apache/tika/parser/crypto/TSDParserTest.java   |   19 +-
 .../parser/fork/ForkParserIntegrationTest.java     |  285 +--
 .../apache/tika/parser/html/HtmlParserTest.java    |   20 +-
 .../apache/tika/parser/mail/MboxParserTest.java    |   16 +-
 .../apache/tika/parser/mail/RFC822ParserTest.java  |   42 +-
 .../tika/parser/microsoft/EMFParserTest.java       |   17 +-
 .../tika/parser/microsoft/ExcelParserTest.java     |    7 +-
 .../microsoft/POIContainerExtractionTest.java      |    9 +-
 .../parser/microsoft/PowerPointParserTest.java     |   14 +-
 .../tika/parser/microsoft/XML2003ParserTest.java   |   25 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |   12 +-
 .../parser/microsoft/ooxml/TruncatedOOXMLTest.java |   17 +-
 .../tika/parser/microsoft/rtf/RTFParserTest.java   |   65 +-
 .../apache/tika/parser/mock/MockParserTest.java    |   70 +-
 .../tika/parser/ocr/TesseractOCRParserTest.java    |   54 +-
 .../org/apache/tika/parser/odf/ODFParserTest.java  |   54 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  100 +-
 .../org/apache/tika/parser/pkg/ArParserTest.java   |   17 +-
 .../apache/tika/parser/pkg/Bzip2ParserTest.java    |   11 +-
 .../pkg/CompositeZipContainerDetectorTest.java     |  141 +-
 .../apache/tika/parser/pkg/CompressParserTest.java |   25 +-
 .../tika/parser/pkg/CompressorParserTest.java      |   17 +-
 .../org/apache/tika/parser/pkg/GzipParserTest.java |   13 +-
 .../org/apache/tika/parser/pkg/RarParserTest.java  |   16 +-
 .../apache/tika/parser/pkg/Seven7ParserTest.java   |   64 +-
 .../org/apache/tika/parser/pkg/TarParserTest.java  |   11 +-
 .../org/apache/tika/parser/pkg/ZipParserTest.java  |   65 +-
 .../org/apache/tika/parser/pkg/ZlibParserTest.java |   11 +-
 .../tika/parser/xml/FictionBookParserTest.java     |   14 +-
 .../sax/PhoneExtractingContentHandlerTest.java     |   20 +-
 .../sax/StandardsExtractingContentHandlerTest.java |   47 +-
 .../apache/tika/utils/ServiceLoaderUtilsTest.java  |    6 +-
 .../src/test/resources/log4j.properties            |    1 +
 .../test-documents/testJAVAPROPS.properties        |    1 +
 .../apache/tika/parser/envi/EnviHeaderParser.java  |  112 +-
 .../org/apache/tika/parser/gdal/GDALParser.java    |  249 +-
 .../geoinfo/GeographicInformationParser.java       |  451 ++--
 .../org/apache/tika/parser/grib/GribParser.java    |   41 +-
 .../java/org/apache/tika/parser/hdf/HDFParser.java |   32 +-
 .../org/apache/tika/parser/isatab/ISATabUtils.java |  311 +--
 .../apache/tika/parser/isatab/ISArchiveParser.java |  222 +-
 .../apache/tika/parser/netcdf/NetCDFParser.java    |   28 +-
 .../tika/parser/envi/EnviHeaderParserTest.java     |   59 +-
 .../apache/tika/parser/gdal/TestGDALParser.java    |  112 +-
 .../geoinfo/GeographicInformationParserTest.java   |    5 +-
 .../apache/tika/parser/grib/GribParserTest.java    |   17 +-
 .../org/apache/tika/parser/hdf/HDFParserTest.java  |   48 +-
 .../tika/parser/isatab/ISArchiveParserTest.java    |   68 +-
 .../tika/parser/netcdf/NetCDFParserTest.java       |   23 +-
 .../ground-truth/EnviHeaderGroundTruth.txt         |    1 +
 .../tika/parser/sqlite3/SQLite3DBParser.java       |   19 +-
 .../apache/tika/parser/sqlite3/SQLite3Parser.java  |   17 +-
 .../tika/parser/sqlite3/SQLite3TableReader.java    |   19 +-
 .../tika/parser/sqlite3/SQLite3ParserTest.java     |   37 +-
 .../apache/tika/mime/TestMimeTypesExtended.java    |   23 +-
 .../tika/parser/sqlite3/SQLite3ParserTest.java     |   68 +-
 tika-server/pom.xml                                |   38 +-
 .../server/classic/config/PDFServerConfig.java     |   42 +-
 .../classic/config/TesseractServerConfig.java      |   41 +-
 .../classic/resource/XMPMetadataResource.java      |   34 +-
 .../classic/writer/XMPMessageBodyWriter.java       |   26 +-
 .../src/main/resources/log4j.properties            |    4 +-
 .../tika/server/classic/DetectorResourceTest.java  |   55 +-
 .../apache/tika/server/classic/FetcherTest.java    |   35 +-
 .../tika/server/classic/MetadataResourceTest.java  |   96 +-
 .../classic/RecursiveMetadataFilterTest.java       |   42 +-
 .../classic/RecursiveMetadataResourceTest.java     |  215 +-
 .../tika/server/classic/TikaDetectorsTest.java     |   41 +-
 .../tika/server/classic/TikaMimeTypesTest.java     |   39 +-
 .../tika/server/classic/TikaParsersTest.java       |   46 +-
 .../tika/server/classic/TikaResourceTest.java      |  418 ++--
 .../tika/server/classic/UnpackerResourceTest.java  |   91 +-
 .../test/resources/config/TIKA-3137-include.xml    |   38 +-
 .../src/test/resources/log4j.properties            |    6 +-
 .../test/resources/test-documents/testHTML.html    |   20 +-
 .../org/apache/tika/server/client/TikaClient.java  |   33 +-
 .../apache/tika/server/client/TikaClientCLI.java   |   70 +-
 .../tika/server/client/TikaEmitterResult.java      |   24 +-
 .../apache/tika/server/client/TikaHttpClient.java  |   79 +-
 .../src/main/resources/log4j.properties            |    6 +-
 .../org/apache/tika/server/client/TestBasic.java   |   15 +-
 .../src/test/resources/log4j.properties            |    6 +-
 .../server/core/CompositeParseContextConfig.java   |   10 +-
 .../server/core/DefaultInputStreamFactory.java     |    9 +-
 .../tika/server/core/FetcherStreamFactory.java     |   17 +-
 .../org/apache/tika/server/core/HTMLHelper.java    |    7 +-
 .../tika/server/core/InputStreamFactory.java       |    9 +-
 .../org/apache/tika/server/core/MetadataList.java  |    7 +-
 .../tika/server/core/ParseContextConfig.java       |   16 +-
 .../org/apache/tika/server/core/ServerStatus.java  |  122 +-
 .../tika/server/core/ServerStatusWatcher.java      |   57 +-
 .../apache/tika/server/core/TikaLoggingFilter.java |    7 +-
 .../org/apache/tika/server/core/TikaServerCli.java |   39 +-
 .../apache/tika/server/core/TikaServerConfig.java  |  543 ++---
 .../tika/server/core/TikaServerParseException.java |    3 +-
 .../core/TikaServerParseExceptionMapper.java       |   12 +-
 .../apache/tika/server/core/TikaServerProcess.java |  194 +-
 .../tika/server/core/TikaServerWatchDog.java       |  222 +-
 .../apache/tika/server/core/WatchDogResult.java    |    7 +-
 .../server/core/config/DocumentSelectorConfig.java |   10 +-
 .../server/core/config/PasswordProviderConfig.java |   27 +-
 .../tika/server/core/resource/AsyncEmitter.java    |   30 +-
 .../tika/server/core/resource/AsyncParser.java     |   42 +-
 .../tika/server/core/resource/AsyncRequest.java    |    4 +-
 .../tika/server/core/resource/AsyncResource.java   |   56 +-
 .../server/core/resource/DetectorResource.java     |   21 +-
 .../tika/server/core/resource/EmitterResource.java |  133 +-
 .../server/core/resource/LanguageResource.java     |   55 +-
 .../server/core/resource/MetadataResource.java     |   62 +-
 .../core/resource/RecursiveMetadataResource.java   |  131 +-
 .../tika/server/core/resource/TikaDetectors.java   |   15 +-
 .../tika/server/core/resource/TikaMimeTypes.java   |   37 +-
 .../tika/server/core/resource/TikaParsers.java     |   43 +-
 .../tika/server/core/resource/TikaResource.java    |  243 +-
 .../server/core/resource/TikaServerStatus.java     |    8 +-
 .../tika/server/core/resource/TikaWelcome.java     |   44 +-
 .../server/core/resource/TranslateResource.java    |  135 +-
 .../server/core/resource/UnpackerResource.java     |   84 +-
 .../server/core/writer/CSVMessageBodyWriter.java   |   29 +-
 .../server/core/writer/JSONMessageBodyWriter.java  |   29 +-
 .../tika/server/core/writer/JSONObjWriter.java     |   30 +-
 .../core/writer/MetadataListMessageBodyWriter.java |   29 +-
 .../apache/tika/server/core/writer/TarWriter.java  |   25 +-
 .../server/core/writer/TextMessageBodyWriter.java  |   28 +-
 .../apache/tika/server/core/writer/ZipWriter.java  |   27 +-
 .../src/main/resources/tikaserver-template.html    |   18 +-
 .../main/resources/tikaserver-version.properties   |   15 +
 .../org/apache/tika/server/core/CXFTestBase.java   |   84 +-
 .../tika/server/core/IntegrationTestBase.java      |   92 +-
 .../tika/server/core/LanguageResourceTest.java     |  139 +-
 .../tika/server/core/NullWebClientLogger.java      |    5 +-
 .../apache/tika/server/core/ServerStatusTest.java  |   16 +-
 .../apache/tika/server/core/StackTraceOffTest.java |   54 +-
 .../apache/tika/server/core/StackTraceTest.java    |   62 +-
 .../apache/tika/server/core/TikaEmitterTest.java   |  162 +-
 .../apache/tika/server/core/TikaMimeTypesTest.java |   34 +-
 .../apache/tika/server/core/TikaResourceTest.java  |   60 +-
 .../core/TikaServerAsyncIntegrationTest.java       |  123 +-
 .../tika/server/core/TikaServerConfigTest.java     |   32 +-
 .../core/TikaServerEmitterIntegrationTest.java     |  147 +-
 .../server/core/TikaServerIntegrationTest.java     |  217 +-
 .../tika/server/core/TikaServerStatusTest.java     |   29 +-
 .../apache/tika/server/core/TikaVersionTest.java   |   20 +-
 .../apache/tika/server/core/TikaWelcomeTest.java   |   54 +-
 .../tika/server/core/TranslateResourceTest.java    |   90 +-
 .../src/test/resources/log4j.properties            |    6 +-
 .../test-documents/mock/heavy_hang_100.xml         |    2 +-
 .../test-documents/mock/heavy_hang_30000.xml       |    2 +-
 .../resources/test-documents/mock/system_exit.xml  |    2 +-
 .../test-documents/mock/testStaticStdOutErr.xml    |   45 +-
 .../test-documents/mock/testStdOutErr.xml          |   45 +-
 .../test-documents/mock/thread_interrupt.xml       |    2 +-
 1006 files changed, 38548 insertions(+), 37878 deletions(-)
 create mode 100644 .github/pull_request_template.md
 copy tika-batch/src/test/resources/log4j.properties => .github/workflows/main-build.yml (59%)
 copy tika-core/src/main/java/org/apache/tika/exception/{package-info.java => WriteLimitReached.java} (90%)
 create mode 100644 tika-parent/checkstyle.xml
 create mode 100644 tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testXPSWithDataDescriptor.xps
 create mode 100644 tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testXPSWithDataDescriptor2.xps
 copy tika-core/src/main/java/org/apache/tika/fork/MetadataContentHandler.java => tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentManifestHandler.java (53%)
 create mode 100644 tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/test/resources/test-documents/testODTEncrypted.odt
 create mode 100644 tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_deeplyEmbeddedAttachments.pdf
 create mode 100644 tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/test/resources/ground-truth/EnviHeaderGroundTruth.txt

[tika] 02/02: merge from main and required updates/conflict resolution

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3304
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 5dbffcd8eaae9135b509cf05a13d17c5542e2fc9
Author: tballison <ta...@apache.org>
AuthorDate: Wed Mar 24 14:26:41 2021 -0400

    merge from main and required updates/conflict resolution
---
 .gitignore                                         |  1 +
 .../tika/pipes/fetchiterator/FetchIterator.java    | 46 ++++++++++++----------
 .../org/apache/tika/server/client/TikaClient.java  |  2 +-
 .../apache/tika/server/client/TikaClientCLI.java   | 35 ++++++++++++++--
 .../tika/server/client/TikaEmitterResult.java      |  7 ++--
 .../apache/tika/server/client/TikaHttpClient.java  |  6 +--
 .../tika/server/core/resource/AsyncEmitter.java    |  2 +-
 .../tika/server/core/resource/EmitterResource.java | 23 ++---------
 8 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/.gitignore b/.gitignore
index dda6180..c608dc2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 .svn
 target
 dependency-reduced-pom.xml
+.editorconfig
 .idea
 .classpath
 .project
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchIterator.java b/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchIterator.java
index dde8222..2b9273d 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchIterator.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchIterator.java
@@ -17,18 +17,24 @@
 package org.apache.tika.pipes.fetchiterator;
 
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.FutureTask;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.tika.config.Field;
 import org.apache.tika.config.Initializable;
 import org.apache.tika.config.InitializableProblemHandler;
 import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaConfigException;
-
+import org.apache.tika.exception.TikaTimeoutException;
 /**
  * Abstract class that handles the testing for timeouts/thread safety
  * issues.  Concrete classes implement the blocking {@link #enqueue()}.
@@ -36,14 +42,13 @@ import org.apache.tika.exception.TikaConfigException;
  * a RuntimeException.  It will throw an IllegalStateException if
  * next() is called after hasNext() has returned false.
  */
-public abstract class FetchIterator implements Callable<Integer>,
-        Iterable<FetchEmitTuple>, Initializable {
+public abstract class FetchIterator
+        implements Callable<Integer>, Iterable<FetchEmitTuple>, Initializable {
 
     public static final long DEFAULT_MAX_WAIT_MS = 300_000;
     public static final int DEFAULT_QUEUE_SIZE = 1000;
 
-    public static final FetchEmitTuple COMPLETED_SEMAPHORE =
-            new FetchEmitTuple(null, null, null);
+    public static final FetchEmitTuple COMPLETED_SEMAPHORE = new FetchEmitTuple(null, null, null);
 
     private static final Logger LOGGER = LoggerFactory.getLogger(FetchIterator.class);
 
@@ -53,16 +58,21 @@ public abstract class FetchIterator implements Callable<Integer>,
     private String fetcherName;
     private String emitterName;
     private int added = 0;
-    private FetchEmitTuple.ON_PARSE_EXCEPTION onParseException = FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT;
+    private FetchEmitTuple.ON_PARSE_EXCEPTION onParseException =
+            FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT;
     private FutureTask<Integer> futureTask;
 
+    public String getFetcherName() {
+        return fetcherName;
+    }
+
     @Field
     public void setFetcherName(String fetcherName) {
         this.fetcherName = fetcherName;
     }
 
-    public String getFetcherName() {
-        return fetcherName;
+    public String getEmitterName() {
+        return emitterName;
     }
 
     @Field
@@ -70,10 +80,6 @@ public abstract class FetchIterator implements Callable<Integer>,
         this.emitterName = emitterName;
     }
 
-    public String getEmitterName() {
-        return emitterName;
-    }
-
     @Field
     public void setMaxWaitMs(long maxWaitMs) {
         this.maxWaitMs = maxWaitMs;
@@ -84,6 +90,10 @@ public abstract class FetchIterator implements Callable<Integer>,
         this.queueSize = queueSize;
     }
 
+    public FetchEmitTuple.ON_PARSE_EXCEPTION getOnParseException() {
+        return onParseException;
+    }
+
     @Field
     public void setOnParseException(String onParseException) throws TikaConfigException {
         if ("skip".equalsIgnoreCase(onParseException)) {
@@ -99,10 +109,6 @@ public abstract class FetchIterator implements Callable<Integer>,
         this.onParseException = onParseException;
     }
 
-    public FetchEmitTuple.ON_PARSE_EXCEPTION getOnParseException() {
-        return onParseException;
-    }
-
     public Integer call() throws Exception {
         enqueue();
         tryToAdd(COMPLETED_SEMAPHORE);
@@ -168,19 +174,19 @@ public abstract class FetchIterator implements Callable<Integer>,
             FetchEmitTuple t = null;
             long start = System.currentTimeMillis();
             try {
-                long elapsed = System.currentTimeMillis()-start;
+                long elapsed = System.currentTimeMillis() - start;
                 while (t == null && elapsed < maxWaitMs) {
                     checkThreadOk();
                     t = queue.poll(100, TimeUnit.MILLISECONDS);
-                    elapsed = System.currentTimeMillis()-start;
+                    elapsed = System.currentTimeMillis() - start;
                 }
             } catch (InterruptedException e) {
                 LOGGER.warn("interrupted");
                 return COMPLETED_SEMAPHORE;
             }
             if (t == null) {
-                throw new TikaTimeoutException("waited longer than "+
-                        maxWaitMs+"ms for the next tuple");
+                throw new TikaTimeoutException(
+                        "waited longer than " + maxWaitMs + "ms for the next tuple");
             }
             return t;
         }
diff --git a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java
index 3b1fb25..a9f7319 100644
--- a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java
+++ b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java
@@ -31,7 +31,7 @@ import org.apache.tika.pipes.fetchiterator.FetchEmitTuple;
 public class TikaClient {
 
     private final Random random = new Random();
-    private List<TikaHttpClient> clients;
+    private final List<TikaHttpClient> clients;
 
 
     private TikaClient(List<TikaHttpClient> clients) {
diff --git a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java
index b959839..5e583db 100644
--- a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java
+++ b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java
@@ -45,8 +45,9 @@ import org.apache.tika.pipes.fetchiterator.FetchIterator;
 public class TikaClientCLI {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(TikaClientCLI.class);
+    private static final int QUEUE_SIZE = 10000;
 
-    private long maxWaitMs = 300000;
+    private final long maxWaitMs = 300000;
 
     public static void main(String[] args) throws Exception {
         //TODO -- add an actual commandline,
@@ -65,9 +66,10 @@ public class TikaClientCLI {
         ExecutorCompletionService<Integer> completionService =
                 new ExecutorCompletionService<>(executorService);
         final FetchIterator fetchIterator = config.getFetchIterator();
-        final ArrayBlockingQueue<FetchEmitTuple> queue = fetchIterator.init(numThreads);
+        final ArrayBlockingQueue<FetchEmitTuple> queue =
+                new ArrayBlockingQueue<>(QUEUE_SIZE);
 
-        completionService.submit(fetchIterator);
+        completionService.submit(new FetchIteratorWrapper(fetchIterator, queue, numThreads));
         if (tikaServerUrls.size() == numThreads) {
             logDiffSizes(tikaServerUrls.size(), numThreads);
             for (int i = 0; i < numThreads; i++) {
@@ -179,4 +181,31 @@ public class TikaClientCLI {
             }
         }
     }
+
+    private class FetchIteratorWrapper implements Callable<Integer> {
+        private final FetchIterator fetchIterator;
+        private final ArrayBlockingQueue<FetchEmitTuple> queue;
+        private final int numThreads;
+
+        public FetchIteratorWrapper(FetchIterator fetchIterator,
+                                    ArrayBlockingQueue<FetchEmitTuple> queue,
+                                    int numThreads) {
+            this.fetchIterator = fetchIterator;
+            this.queue = queue;
+            this.numThreads = numThreads;
+
+        }
+
+        @Override
+        public Integer call() throws Exception {
+            for (FetchEmitTuple t : fetchIterator) {
+                //potentially blocks forever
+                queue.put(t);
+            }
+            for (int i = 0; i < numThreads; i ++) {
+                queue.put(FetchIterator.COMPLETED_SEMAPHORE);
+            }
+            return 1;
+        }
+    }
 }
diff --git a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java
index 1438885..4cb134c 100644
--- a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java
+++ b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java
@@ -19,9 +19,10 @@ package org.apache.tika.server.client;
 public class TikaEmitterResult {
 
 
-    private STATUS status;
-    private String msg;//used for exceptions. will be null for status ok
-    private long timeElapsed;
+    private final STATUS status;
+    private final String msg;//used for exceptions. will be null for status ok
+    private final long timeElapsed;
+
     public TikaEmitterResult(STATUS status, long timeElapsed, String msg) {
         this.status = status;
         this.timeElapsed = timeElapsed;
diff --git a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java
index 6d6517f..9a5ff7f 100644
--- a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java
+++ b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java
@@ -48,11 +48,11 @@ class TikaHttpClient {
     private final String emitEndPointUrl;
     private final String asyncEndPointUrl;
     private final String tikaUrl;
-    private int maxRetries = 3;
+    private final int maxRetries = 3;
     //if can't make contact with Tika server, max wait time in ms
-    private long maxWaitForTikaMs = 120000;
+    private final long maxWaitForTikaMs = 120000;
     //how often to ping /tika (in ms) to see if the server is up and running
-    private long pulseWaitForTikaMs = 1000;
+    private final long pulseWaitForTikaMs = 1000;
 
     /**
      * @param baseUrl    url to base endpoint
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncEmitter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncEmitter.java
index 1ebe787..054a1d2 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncEmitter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncEmitter.java
@@ -97,7 +97,7 @@ public class AsyncEmitter implements Callable<Integer> {
         void add(EmitData data) {
             size++;
             long sz = AbstractEmitter
-                    .estimateSizeInBytes(data.getEmitKey().getKey(), data.getMetadataList());
+                    .estimateSizeInBytes(data.getEmitKey().getEmitKey(), data.getMetadataList());
             if (estimatedSize + sz > maxBytes) {
                 LOG.debug("estimated size ({}) > maxBytes({}), going to emitAll",
                         (estimatedSize + sz), maxBytes);
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
index 56a71db..306732a 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
@@ -64,18 +64,13 @@ public class EmitterResource {
     private static final String FETCH_KEY_ABBREV = "fk";
     private static final String EMIT_KEY_ABBREV = "ek";
 
-    /**
-     * key that is safe to pass through http header.
-     * The user _must_ specify this for the fsemitter if calling 'put'
-     */
-    public static final String EMIT_KEY_FOR_HTTP_HEADER = "emit-key";
     private static final Logger LOG = LoggerFactory.getLogger(EmitterResource.class);
 
     static EmitKey calcEmitKey(FetchEmitTuple t) {
         //use fetch key if emitter key is not specified
         //TODO: clean this up?
         EmitKey emitKey = t.getEmitKey();
-        if (StringUtils.isBlank(emitKey.getKey())) {
+        if (StringUtils.isBlank(emitKey.getEmitKey())) {
             emitKey = new EmitKey(emitKey.getEmitterName(), t.getFetchKey().getKey());
         }
         return emitKey;
@@ -197,21 +192,11 @@ public class EmitterResource {
         return emit(calcEmitKey(t), metadataList);
     }
 
-    static EmitKey calcEmitKey(FetchEmitTuple t) {
-        //use fetch key if emitter key is not specified
-        //TODO: clean this up?
-        EmitKey emitKey = t.getEmitKey();
-        if (StringUtils.isBlank(emitKey.getKey())) {
-            emitKey = new EmitKey(emitKey.getEmitterName(), t.getFetchKey().getKey());
-        }
-        return emitKey;
-    }
-
     private Map<String, String> skip(FetchEmitTuple t, List<Metadata> metadataList) {
         Map<String, String> statusMap = new HashMap<>();
         statusMap.put("status", "ok");
         statusMap.put("emitter", t.getEmitKey().getEmitterName());
-        statusMap.put("emitKey", t.getEmitKey().getKey());
+        statusMap.put("emitKey", t.getEmitKey().getEmitKey());
         String msg = metadataList.get(0).get(TikaCoreProperties.CONTAINER_EXCEPTION);
         statusMap.put("parse_exception", msg);
         return statusMap;
@@ -269,9 +254,9 @@ public class EmitterResource {
         String status = "ok";
         String exceptionMsg = "";
         try {
-            emitter.emit(emitKey.getKey(), metadataList);
+            emitter.emit(emitKey.getEmitKey(), metadataList);
         } catch (IOException | TikaEmitterException e) {
-            LOG.warn("problem emitting (" + emitKey.getKey() + ")", e);
+            LOG.warn("problem emitting (" + emitKey.getEmitterName() + ")", e);
             status = "emitter_exception";
             exceptionMsg = ExceptionUtils.getStackTrace(e);
         }

[tika] 01/02: Merge remote-tracking branch 'origin/main' into TIKA-3304

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3304
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 9416f5926af5ab7637b8afa6a787be37e1b360e6
Merge: 26ff633 d87ac65
Author: tballison <ta...@apache.org>
AuthorDate: Wed Mar 24 13:30:44 2021 -0400

    Merge remote-tracking branch 'origin/main' into TIKA-3304
    
    # Conflicts:
    #	tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
    #	tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchIterator.java
    #	tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
    #	tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncEmitter.java
    #	tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
    #	tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java

 .github/pull_request_template.md                   |   12 +
 .../workflows/main-build.yml                       |   33 +-
 CHANGES.txt                                        |   25 +-
 tika-core/pom.xml                                  |   30 +
 tika-core/src/main/java/org/apache/tika/Tika.java  |  143 +-
 .../concurrent/ConfigurableThreadPoolExecutor.java |   64 +-
 .../tika/concurrent/SimpleThreadPoolExecutor.java  |   82 +-
 .../main/java/org/apache/tika/config/Field.java    |    4 +-
 .../java/org/apache/tika/config/Initializable.java |   11 +-
 .../tika/config/InitializableProblemHandler.java   |   19 +-
 .../org/apache/tika/config/LoadErrorHandler.java   |   29 +-
 .../main/java/org/apache/tika/config/Param.java    |  261 +-
 .../java/org/apache/tika/config/ParamField.java    |   56 +-
 .../java/org/apache/tika/config/ServiceLoader.java |  212 +-
 .../java/org/apache/tika/config/TikaActivator.java |    5 +-
 .../java/org/apache/tika/config/TikaConfig.java    |  701 +++---
 .../apache/tika/config/TikaConfigSerializer.java   |  119 +-
 .../org/apache/tika/detect/AutoDetectReader.java   |  100 +-
 .../org/apache/tika/detect/CompositeDetector.java  |   34 +-
 .../tika/detect/CompositeEncodingDetector.java     |   22 +-
 .../org/apache/tika/detect/DefaultDetector.java    |   83 +-
 .../tika/detect/DefaultEncodingDetector.java       |   13 +-
 .../apache/tika/detect/DefaultProbDetector.java    |   32 +-
 .../main/java/org/apache/tika/detect/Detector.java |    2 +-
 .../java/org/apache/tika/detect/EmptyDetector.java |    3 +-
 .../org/apache/tika/detect/EncodingDetector.java   |    2 +-
 .../apache/tika/detect/FileCommandDetector.java    |   58 +-
 .../java/org/apache/tika/detect/MagicDetector.java |  357 ++-
 .../apache/tika/detect/NNExampleModelDetector.java |   19 +-
 .../org/apache/tika/detect/NNTrainedModel.java     |  147 +-
 .../apache/tika/detect/NNTrainedModelBuilder.java  |   77 +-
 .../java/org/apache/tika/detect/NameDetector.java  |    6 +-
 .../tika/detect/NonDetectingEncodingDetector.java  |    8 +-
 .../org/apache/tika/detect/OverrideDetector.java   |    9 +-
 .../java/org/apache/tika/detect/TextDetector.java  |   15 +-
 .../org/apache/tika/detect/TextStatistics.java     |   27 +-
 .../java/org/apache/tika/detect/TrainedModel.java  |    7 +-
 .../apache/tika/detect/TrainedModelDetector.java   |   22 +-
 .../java/org/apache/tika/detect/TypeDetector.java  |    2 +-
 .../org/apache/tika/detect/XmlRootExtractor.java   |   16 +-
 .../apache/tika/detect/ZeroSizeFileDetector.java   |   13 +-
 .../java/org/apache/tika/embedder/Embedder.java    |   37 +-
 .../org/apache/tika/embedder/ExternalEmbedder.java |  243 +-
 .../tika/exception/EncryptedDocumentException.java |    2 +-
 .../apache/tika/exception/TikaConfigException.java |    1 +
 .../org/apache/tika/exception/TikaException.java   |    2 +-
 .../tika/exception/TikaMemoryLimitException.java   |    2 +-
 .../apache/tika/exception/WriteLimitReached.java   |    9 +-
 .../tika/exception/ZeroByteFileException.java      |   15 +-
 .../apache/tika/extractor/ContainerExtractor.java  |   23 +-
 .../extractor/DefaultEmbeddedStreamTranslator.java |    8 +-
 .../apache/tika/extractor/DocumentSelector.java    |    2 +-
 .../tika/extractor/EmbeddedDocumentExtractor.java  |    7 +-
 .../tika/extractor/EmbeddedDocumentUtil.java       |   24 +-
 .../tika/extractor/EmbeddedStreamTranslator.java   |    4 +-
 .../tika/extractor/ParserContainerExtractor.java   |    8 +-
 .../ParsingEmbeddedDocumentExtractor.java          |   28 +-
 .../org/apache/tika/fork/ClassLoaderProxy.java     |   10 +-
 .../org/apache/tika/fork/ClassLoaderResource.java  |    7 +-
 .../org/apache/tika/fork/ContentHandlerProxy.java  |   54 +-
 .../apache/tika/fork/ContentHandlerResource.java   |   14 +-
 .../main/java/org/apache/tika/fork/ForkClient.java |  184 +-
 .../apache/tika/fork/ForkObjectInputStream.java    |   43 +-
 .../main/java/org/apache/tika/fork/ForkParser.java |  162 +-
 .../java/org/apache/tika/fork/ForkResource.java    |    4 +-
 .../main/java/org/apache/tika/fork/ForkServer.java |  126 +-
 .../org/apache/tika/fork/InputStreamProxy.java     |    4 +-
 .../org/apache/tika/fork/InputStreamResource.java  |    3 +-
 .../org/apache/tika/fork/MemoryURLConnection.java  |    2 +-
 .../apache/tika/fork/MemoryURLStreamHandler.java   |    4 +-
 .../tika/fork/MemoryURLStreamHandlerFactory.java   |    2 +-
 .../apache/tika/fork/MemoryURLStreamRecord.java    |    2 +-
 .../apache/tika/fork/MetadataContentHandler.java   |    6 +-
 .../org/apache/tika/fork/ParserFactoryFactory.java |   13 +-
 .../fork/RecursiveMetadataContentHandlerProxy.java |   39 +-
 .../RecursiveMetadataContentHandlerResource.java   |   40 +-
 .../org/apache/tika/io/BoundedInputStream.java     |    4 +-
 .../main/java/org/apache/tika/io/EndianUtils.java  |   42 +-
 .../java/org/apache/tika/io/FilenameUtils.java     |   30 +-
 .../src/main/java/org/apache/tika/io/IOUtils.java  |   18 +-
 .../org/apache/tika/io/InputStreamFactory.java     |   17 +-
 .../org/apache/tika/io/LookaheadInputStream.java   |   10 +-
 .../org/apache/tika/io/MappedBufferCleaner.java    |   96 +-
 .../main/java/org/apache/tika/io/TailStream.java   |  141 +-
 .../org/apache/tika/io/TemporaryResources.java     |    8 +-
 .../java/org/apache/tika/io/TikaInputStream.java   |  314 ++-
 .../apache/tika/language/LanguageIdentifier.java   |  165 +-
 .../org/apache/tika/language/LanguageProfile.java  |  106 +-
 .../tika/language/LanguageProfilerBuilder.java     |  496 ++--
 .../org/apache/tika/language/ProfilingHandler.java |    3 +-
 .../org/apache/tika/language/ProfilingWriter.java  |    2 +-
 .../tika/language/detect/LanguageConfidence.java   |    5 +-
 .../tika/language/detect/LanguageDetector.java     |  342 +--
 .../tika/language/detect/LanguageHandler.java      |   10 +-
 .../apache/tika/language/detect/LanguageNames.java |  111 +-
 .../tika/language/detect/LanguageResult.java       |  158 +-
 .../tika/language/detect/LanguageWriter.java       |   10 +-
 .../tika/language/translate/DefaultTranslator.java |   14 +-
 .../tika/language/translate/EmptyTranslator.java   |    2 +-
 .../apache/tika/language/translate/Translator.java |   22 +-
 .../apache/tika/metadata/AccessPermissions.java    |   24 +-
 .../org/apache/tika/metadata/ClimateForcast.java   |   30 +-
 .../org/apache/tika/metadata/CreativeCommons.java  |    2 +-
 .../java/org/apache/tika/metadata/Database.java    |   14 +-
 .../java/org/apache/tika/metadata/DublinCore.java  |   76 +-
 .../main/java/org/apache/tika/metadata/Font.java   |    4 +-
 .../java/org/apache/tika/metadata/Geographic.java  |   15 +-
 .../main/java/org/apache/tika/metadata/HTML.java   |    6 +-
 .../java/org/apache/tika/metadata/HttpHeaders.java |    4 +-
 .../main/java/org/apache/tika/metadata/IPTC.java   | 2527 ++++++++++----------
 .../org/apache/tika/metadata/MachineMetadata.java  |  143 +-
 .../java/org/apache/tika/metadata/Message.java     |   38 +-
 .../java/org/apache/tika/metadata/Metadata.java    |  241 +-
 .../main/java/org/apache/tika/metadata/Office.java |  211 +-
 .../apache/tika/metadata/OfficeOpenXMLCore.java    |   52 +-
 .../tika/metadata/OfficeOpenXMLExtended.java       |   65 +-
 .../main/java/org/apache/tika/metadata/PDF.java    |   76 +-
 .../java/org/apache/tika/metadata/PagedText.java   |    4 +-
 .../java/org/apache/tika/metadata/Photoshop.java   |   31 +-
 .../java/org/apache/tika/metadata/Property.java    |  265 +-
 .../tika/metadata/PropertyTypeException.java       |    8 +-
 .../java/org/apache/tika/metadata/QuattroPro.java  |   66 +-
 .../java/org/apache/tika/metadata/RTFMetadata.java |   45 +-
 .../main/java/org/apache/tika/metadata/TIFF.java   |  100 +-
 .../apache/tika/metadata/TikaCoreProperties.java   |  337 ++-
 .../java/org/apache/tika/metadata/WordPerfect.java |  100 +-
 .../main/java/org/apache/tika/metadata/XMP.java    |    4 +-
 .../main/java/org/apache/tika/metadata/XMPDM.java  |  290 +--
 .../main/java/org/apache/tika/metadata/XMPIdq.java |    4 +-
 .../main/java/org/apache/tika/metadata/XMPMM.java  |   42 +-
 .../java/org/apache/tika/metadata/XMPRights.java   |   20 +-
 .../metadata/filter/ClearByMimeMetadataFilter.java |   13 +-
 .../metadata/filter/CompositeMetadataFilter.java   |    4 +-
 .../metadata/filter/DefaultMetadataFilter.java     |   20 +-
 .../filter/ExcludeFieldMetadataFilter.java         |   10 +-
 .../metadata/filter/FieldNameMappingFilter.java    |   25 +-
 .../filter/IncludeFieldMetadataFilter.java         |   11 +-
 .../tika/metadata/filter/MetadataFilter.java       |    4 +-
 .../main/java/org/apache/tika/mime/HexCoDec.java   |   49 +-
 .../src/main/java/org/apache/tika/mime/Magic.java  |    2 -
 .../main/java/org/apache/tika/mime/MagicMatch.java |   12 +-
 .../main/java/org/apache/tika/mime/MediaType.java  |  273 +--
 .../org/apache/tika/mime/MediaTypeRegistry.java    |   67 +-
 .../main/java/org/apache/tika/mime/MimeType.java   |  283 ++-
 .../org/apache/tika/mime/MimeTypeException.java    |    6 +-
 .../main/java/org/apache/tika/mime/MimeTypes.java  |  247 +-
 .../org/apache/tika/mime/MimeTypesFactory.java     |  108 +-
 .../java/org/apache/tika/mime/MimeTypesReader.java |  346 +--
 .../apache/tika/mime/MimeTypesReaderMetKeys.java   |    2 +-
 .../org/apache/tika/mime/MinShouldMatchClause.java |   11 +-
 .../main/java/org/apache/tika/mime/Patterns.java   |   99 +-
 .../mime/ProbabilisticMimeDetectionSelector.java   |   93 +-
 .../parser/AbstractEncodingDetectorParser.java     |    1 +
 .../org/apache/tika/parser/AbstractParser.java     |   11 +-
 .../org/apache/tika/parser/AutoDetectParser.java   |   33 +-
 .../tika/parser/AutoDetectParserFactory.java       |    9 +-
 .../org/apache/tika/parser/CompositeParser.java    |   93 +-
 .../java/org/apache/tika/parser/CryptoParser.java  |   26 +-
 .../java/org/apache/tika/parser/DefaultParser.java |   88 +-
 .../org/apache/tika/parser/DelegatingParser.java   |   13 +-
 .../org/apache/tika/parser/DigestingParser.java    |   63 +-
 .../java/org/apache/tika/parser/EmptyParser.java   |   20 +-
 .../java/org/apache/tika/parser/ErrorParser.java   |   12 +-
 .../java/org/apache/tika/parser/NetworkParser.java |   66 +-
 .../java/org/apache/tika/parser/ParseContext.java  |   67 +-
 .../main/java/org/apache/tika/parser/Parser.java   |   23 +-
 .../org/apache/tika/parser/ParserDecorator.java    |   84 +-
 .../java/org/apache/tika/parser/ParserFactory.java |    7 +-
 .../apache/tika/parser/ParserPostProcessor.java    |   11 +-
 .../java/org/apache/tika/parser/ParsingReader.java |  130 +-
 .../org/apache/tika/parser/PasswordProvider.java   |   11 +-
 .../apache/tika/parser/RecursiveParserWrapper.java |  177 +-
 .../org/apache/tika/parser/StatefulParser.java     |    2 +-
 .../tika/parser/digest/CompositeDigester.java      |    2 +-
 .../tika/parser/digest/InputStreamDigester.java    |   64 +-
 .../parser/external/CompositeExternalParser.java   |   25 +-
 .../tika/parser/external/ExternalParser.java       |  293 ++-
 .../external/ExternalParsersConfigReader.java      |  335 ++-
 .../ExternalParsersConfigReaderMetKeys.java        |   14 +-
 .../parser/external/ExternalParsersFactory.java    |   93 +-
 .../parser/multiple/AbstractMultipleParser.java    |  376 +--
 .../tika/parser/multiple/FallbackParser.java       |   36 +-
 .../tika/parser/multiple/SupplementingParser.java  |   50 +-
 .../apache/tika/pipes/emitter/AbstractEmitter.java |   38 +-
 .../org/apache/tika/pipes/emitter/EmitData.java    |   25 +-
 .../org/apache/tika/pipes/emitter/EmitKey.java     |   18 +-
 .../org/apache/tika/pipes/emitter/Emitter.java     |    4 +-
 .../apache/tika/pipes/emitter/EmitterManager.java  |   16 +-
 .../apache/tika/pipes/emitter/EmptyEmitter.java    |    7 +-
 .../apache/tika/pipes/emitter/StreamEmitter.java   |    4 +-
 .../apache/tika/pipes/fetcher/EmptyFetcher.java    |    6 +-
 .../org/apache/tika/pipes/fetcher/FetchKey.java    |   22 +-
 .../org/apache/tika/pipes/fetcher/Fetcher.java     |   10 +-
 .../apache/tika/pipes/fetcher/FetcherManager.java  |   18 +-
 .../tika/pipes/fetcher/FileSystemFetcher.java      |   50 +-
 .../pipes/fetchiterator/EmptyFetchIterator.java    |    6 -
 .../tika/pipes/fetchiterator/FetchEmitTuple.java   |   38 +-
 .../tika/pipes/fetchiterator/FetchIterator.java    |   24 +-
 .../fetchiterator/FileSystemFetchIterator.java     |   41 +-
 .../sax/AbstractRecursiveParserWrapperHandler.java |   53 +-
 .../tika/sax/BasicContentHandlerFactory.java       |   98 +-
 .../org/apache/tika/sax/BodyContentHandler.java    |   12 +-
 .../java/org/apache/tika/sax/CleanPhoneText.java   |  345 +--
 .../apache/tika/sax/ContentHandlerDecorator.java   |   18 +-
 .../org/apache/tika/sax/ContentHandlerFactory.java |   15 +-
 .../org/apache/tika/sax/DIFContentHandler.java     |  242 +-
 .../tika/sax/ElementMappingContentHandler.java     |   81 +-
 .../sax/EndDocumentShieldingContentHandler.java    |   16 +-
 .../tika/sax/ExpandedTitleContentHandler.java      |   22 +-
 .../src/main/java/org/apache/tika/sax/Link.java    |    4 +-
 .../main/java/org/apache/tika/sax/LinkBuilder.java |   12 +-
 .../org/apache/tika/sax/LinkContentHandler.java    |   30 +-
 .../tika/sax/PhoneExtractingContentHandler.java    |   20 +-
 .../tika/sax/RecursiveParserWrapperHandler.java    |   54 +-
 .../apache/tika/sax/RichTextContentHandler.java    |    3 +-
 .../org/apache/tika/sax/SafeContentHandler.java    |  155 +-
 .../org/apache/tika/sax/SecureContentHandler.java  |   86 +-
 .../org/apache/tika/sax/StandardOrganizations.java |  305 +--
 .../org/apache/tika/sax/StandardReference.java     |  201 +-
 .../sax/StandardsExtractingContentHandler.java     |  155 +-
 .../java/org/apache/tika/sax/StandardsText.java    |  277 +--
 .../org/apache/tika/sax/TaggedContentHandler.java  |    8 +-
 .../org/apache/tika/sax/TaggedSAXException.java    |    6 +-
 .../org/apache/tika/sax/TeeContentHandler.java     |   18 +-
 .../tika/sax/TextAndAttributeContentHandler.java   |   12 +-
 .../org/apache/tika/sax/TextContentHandler.java    |   12 +-
 .../org/apache/tika/sax/ToHTMLContentHandler.java  |   10 +-
 .../org/apache/tika/sax/ToTextContentHandler.java  |   30 +-
 .../org/apache/tika/sax/ToXMLContentHandler.java   |  120 +-
 .../apache/tika/sax/WriteOutContentHandler.java    |   52 +-
 .../org/apache/tika/sax/XHTMLContentHandler.java   |  103 +-
 .../org/apache/tika/sax/XMPContentHandler.java     |   27 +-
 .../apache/tika/sax/xpath/CompositeMatcher.java    |    3 +-
 .../java/org/apache/tika/sax/xpath/Matcher.java    |    4 +-
 .../tika/sax/xpath/MatchingContentHandler.java     |   21 +-
 .../org/apache/tika/sax/xpath/XPathParser.java     |   10 +-
 .../org/apache/tika/utils/AnnotationUtils.java     |   61 +-
 .../java/org/apache/tika/utils/CharsetUtils.java   |   97 +-
 .../java/org/apache/tika/utils/CompareUtils.java   |   10 +-
 .../org/apache/tika/utils/ConcurrentUtils.java     |  112 +-
 .../main/java/org/apache/tika/utils/DateUtils.java |   90 +-
 .../java/org/apache/tika/utils/ExceptionUtils.java |    5 +-
 .../java/org/apache/tika/utils/ParserUtils.java    |   60 +-
 .../java/org/apache/tika/utils/ProcessUtils.java   |    2 +-
 .../java/org/apache/tika/utils/RegexUtils.java     |   20 +-
 .../apache/tika/utils/RereadableInputStream.java   |  261 +-
 .../org/apache/tika/utils/ServiceLoaderUtils.java  |   12 +-
 .../java/org/apache/tika/utils/StringUtils.java    |   26 +-
 .../java/org/apache/tika/utils/SystemUtils.java    |   15 +-
 .../java/org/apache/tika/utils/XMLReaderUtils.java |  545 +++--
 .../org/apache/custom/detect/MyCustomDetector.java |    6 +-
 .../org/apache/tika/MultiThreadedTikaTest.java     |  332 +--
 .../apache/tika/ResourceLoggingClassLoader.java    |   24 +-
 .../org/apache/tika/TestRereadableInputStream.java |  144 +-
 .../java/org/apache/tika/TikaDetectionTest.java    |   51 +-
 .../src/test/java/org/apache/tika/TikaIT.java      |    5 +-
 .../src/test/java/org/apache/tika/TikaTest.java    |  444 ++--
 .../org/apache/tika/TypeDetectionBenchmark.java    |   18 +-
 .../apache/tika/config/AbstractTikaConfigTest.java |   14 +-
 .../java/org/apache/tika/config/DummyExecutor.java |   59 +-
 .../java/org/apache/tika/config/DummyParser.java   |    8 +-
 .../java/org/apache/tika/config/ParamTest.java     |   37 +-
 .../tika/config/TikaConfigSerializerTest.java      |   24 +-
 .../org/apache/tika/config/TikaConfigTest.java     |  163 +-
 .../tika/detect/FileCommandDetectorTest.java       |   40 +-
 .../org/apache/tika/detect/MagicDetectorTest.java  |  143 +-
 .../tika/detect/MimeDetectionWithNNTest.java       |  213 +-
 .../org/apache/tika/detect/NameDetectorTest.java   |   25 +-
 .../org/apache/tika/detect/TextDetectorTest.java   |   31 +-
 .../org/apache/tika/detect/TypeDetectorTest.java   |   31 +-
 .../tika/detect/ZeroSizeFileDetectorTest.java      |    5 +-
 .../java/org/apache/tika/fork/ForkParserTest.java  |  123 +-
 .../apache/tika/fork/ForkParserTikaBinTest.java    |  135 +-
 .../java/org/apache/tika/fork/ForkTestParser.java  |   21 +-
 .../tika/fork/UpperCasingContentHandler.java       |    7 +-
 .../java/org/apache/tika/io/EndianUtilsTest.java   |   38 +-
 .../java/org/apache/tika/io/FilenameUtilsTest.java |   39 +-
 .../apache/tika/io/LookaheadInputStreamTest.java   |   20 +-
 .../java/org/apache/tika/io/TailStreamTest.java    |   87 +-
 .../org/apache/tika/io/TemporaryResourcesTest.java |    6 +-
 .../org/apache/tika/io/TikaInputStreamTest.java    |   56 +-
 .../tika/language/LanguageIdentifierTest.java      |   44 +-
 .../apache/tika/language/LanguageProfileTest.java  |    2 +-
 .../tika/language/LanguageProfilerBuilderTest.java |   32 +-
 .../tika/language/detect/LanguageNamesTest.java    |   26 +-
 .../org/apache/tika/metadata/TestMetadata.java     |  212 +-
 .../tika/metadata/filter/MockUpperCaseFilter.java  |    4 +-
 .../tika/metadata/filter/TestMetadataFilter.java   |   31 +-
 .../org/apache/tika/mime/CustomReaderTest.java     |  120 +-
 .../java/org/apache/tika/mime/MediaTypeTest.java   |   86 +-
 .../org/apache/tika/mime/MimeDetectionTest.java    |  135 +-
 .../org/apache/tika/mime/MimeTypesReaderTest.java  |  297 ++-
 .../java/org/apache/tika/mime/PatternsTest.java    |   18 +-
 .../tika/mime/ProbabilisticMimeDetectionTest.java  |  114 +-
 .../ProbabilisticMimeDetectionTestWithTika.java    |  100 +-
 .../apache/tika/parser/CompositeParserTest.java    |  158 +-
 .../tika/parser/DummyInitializableParser.java      |   29 +-
 .../tika/parser/DummyParameterizedParser.java      |   71 +-
 .../java/org/apache/tika/parser/DummyParser.java   |   57 +-
 .../tika/parser/InitializableParserTest.java       |   14 +-
 .../tika/parser/ParameterizedParserTest.java       |   35 +-
 .../apache/tika/parser/ParserDecoratorTest.java    |   57 +-
 .../org/apache/tika/parser/mock/MockParser.java    |   72 +-
 .../apache/tika/parser/mock/MockParserFactory.java |    8 +-
 .../org/apache/tika/parser/mock/VowelParser.java   |   11 +-
 .../tika/parser/multiple/MultipleParserTest.java   |  137 +-
 .../org/apache/tika/pipes/emitter/MockEmitter.java |    8 +-
 .../tika/pipes/fetcher/FileSystemFetcherTest.java  |   10 +-
 .../fetchiterator/FileSystemFetchIteratorTest.java |   25 +-
 .../tika/sax/BasicContentHandlerFactoryTest.java   |   98 +-
 .../apache/tika/sax/BodyContentHandlerTest.java    |    9 +-
 .../apache/tika/sax/LinkContentHandlerTest.java    |   29 +-
 .../apache/tika/sax/OfflineContentHandlerTest.java |    9 +-
 .../tika/sax/RichTextContentHandlerTest.java       |   15 +-
 .../apache/tika/sax/SecureContentHandlerTest.java  |    7 +-
 .../java/org/apache/tika/sax/SerializerTest.java   |   55 +-
 .../apache/tika/sax/XHTMLContentHandlerTest.java   |   77 +-
 .../org/apache/tika/utils/AnnotationUtilsTest.java |   47 +-
 .../org/apache/tika/utils/CharsetUtilsTest.java    |   14 +-
 .../org/apache/tika/utils/ConcurrentUtilsTest.java |  126 +-
 .../java/org/apache/tika/utils/RegexUtilsTest.java |   31 +-
 .../apache/tika/utils/ServiceLoaderUtilsTest.java  |   28 +-
 tika-core/src/test/resources/log4j.properties      |    1 +
 .../org/apache/tika/config/FileCommandDetector.xml |    2 +-
 .../org/apache/tika/config/TIKA-1762-executors.xml |   62 +-
 .../apache/tika/fuzzing/general/ByteFlipper.java   |    2 +-
 tika-parent/checkstyle.xml                         |  139 ++
 tika-parent/pom.xml                                |   42 +-
 tika-parsers/pom.xml                               |   38 +-
 .../tika/parser/recognition/AgeRecogniser.java     |  182 +-
 .../parser/recognition/AgeRecogniserConfig.java    |   59 +-
 .../tika/parser/recognition/AgeRecogniserTest.java |   53 +-
 .../tika/dl/imagerec/DL4JInceptionV3Net.java       |  138 +-
 .../org/apache/tika/dl/imagerec/DL4JVGG16Net.java  |   73 +-
 .../tika/dl/imagerec/DL4JInceptionV3NetTest.java   |   14 +-
 .../apache/tika/dl/imagerec/DL4JVGG16NetTest.java  |   15 +-
 .../tika/parser/captioning/CaptionObject.java      |    6 +-
 .../captioning/tf/TensorflowRESTCaptioner.java     |   44 +-
 .../tika/parser/pot/PooledTimeSeriesParser.java    |   85 +-
 .../tika/parser/recognition/ObjectRecogniser.java  |   36 +-
 .../recognition/ObjectRecognitionParser.java       |   50 +-
 .../tika/parser/recognition/RecognisedObject.java  |    7 +-
 .../recognition/tf/TensorflowImageRecParser.java   |   97 +-
 .../recognition/tf/TensorflowRESTRecogniser.java   |   54 +-
 .../tf/TensorflowRESTVideoRecogniser.java          |   30 +-
 .../tika/parser/captioning/tf/model_info.xml       |    3 +-
 .../recognition/tika-config-tflow-video-rest.xml   |    3 +-
 .../recognition/ObjectRecognitionParserTest.java   |   96 +-
 .../tf/TensorflowImageRecParserTest.java           |   28 +-
 .../tf/TensorflowVideoRecParserTest.java           |   30 +-
 .../parser/ctakes/CTAKESAnnotationProperty.java    |   16 +-
 .../apache/tika/parser/ctakes/CTAKESConfig.java    |  249 +-
 .../tika/parser/ctakes/CTAKESContentHandler.java   |  178 +-
 .../apache/tika/parser/ctakes/CTAKESParser.java    |   42 +-
 .../tika/parser/ctakes/CTAKESSerializer.java       |    5 +-
 .../org/apache/tika/parser/ctakes/CTAKESUtils.java |  423 ++--
 .../java/org/apache/tika/parser/geo/GeoParser.java |   85 +-
 .../apache/tika/parser/geo/GeoParserConfig.java    |   33 +-
 .../java/org/apache/tika/parser/geo/GeoTag.java    |   73 +-
 .../tika/parser/geo/NameEntityExtractor.java       |   23 +-
 .../parser/geo/gazetteer/GeoGazetteerClient.java   |  157 +-
 .../apache/tika/parser/geo/gazetteer/Location.java |  107 +-
 .../tika/parser/journal/GrobidRESTParser.java      |   62 +-
 .../apache/tika/parser/journal/JournalParser.java  |   44 +-
 .../apache/tika/parser/journal/TEIDOMParser.java   |  159 +-
 .../org/apache/tika/parser/ner/NERecogniser.java   |    8 +-
 .../apache/tika/parser/ner/NamedEntityParser.java  |   76 +-
 .../parser/ner/corenlp/CoreNLPNERecogniser.java    |   93 +-
 .../tika/parser/ner/grobid/GrobidNERecogniser.java |  208 +-
 .../tika/parser/ner/mitie/MITIENERecogniser.java   |  115 +-
 .../tika/parser/ner/nltk/NLTKNERecogniser.java     |   69 +-
 .../parser/ner/opennlp/OpenNLPNERecogniser.java    |   42 +-
 .../tika/parser/ner/opennlp/OpenNLPNameFinder.java |   37 +-
 .../tika/parser/ner/regex/RegexNERecogniser.java   |   31 +-
 .../parser/sentiment/SentimentAnalysisParser.java  |   33 +-
 .../tika/parser/ctakes/CTAKESConfig.properties     |    2 +-
 .../tika/parser/geo/GeoTopicConfig.properties      |    2 +-
 .../tika/parser/journal/GrobidExtractor.properties |    2 +-
 .../tika/parser/ner/grobid/GrobidServer.properties |    4 +-
 .../tika/parser/ner/nltk/NLTKServer.properties     |    2 +-
 .../org/apache/tika/parser/geo/GeoParserTest.java  |  147 +-
 .../tika/parser/journal/JournalParserTest.java     |    5 +-
 .../org/apache/tika/parser/journal/TEITest.java    |   32 +-
 .../tika/parser/ner/NamedEntityParserTest.java     |   29 +-
 .../tika/parser/ner/nltk/NLTKNERecogniserTest.java |   20 +-
 .../parser/ner/regex/RegexNERecogniserTest.java    |   18 +-
 .../sentiment/SentimentAnalysisParserTest.java     |   49 +-
 .../tika/config/TIKA-3078-geo.topic.GeoParser.xml  |   22 +-
 tika-parsers/tika-parsers-classic/pom.xml          |   89 +-
 .../apache/tika/detect/apple/BPListDetector.java   |   66 +-
 .../apache/tika/detect/apple/IWorkDetector.java    |   18 +-
 .../tika/parser/apple/AppleSingleFileParser.java   |   60 +-
 .../org/apache/tika/parser/apple/PListParser.java  |   92 +-
 .../tika/parser/iwork/AutoPageNumberUtils.java     |  146 +-
 .../tika/parser/iwork/IWorkPackageParser.java      |  271 ++-
 .../tika/parser/iwork/KeynoteContentHandler.java   |   36 +-
 .../tika/parser/iwork/NumbersContentHandler.java   |   16 +-
 .../tika/parser/iwork/PagesContentHandler.java     |  436 ++--
 .../parser/iwork/iwana/IWork13PackageParser.java   |  198 +-
 .../parser/iwork/iwana/IWork18PackageParser.java   |  180 +-
 .../apache/tika/parser/apple/PListParserTest.java  |   11 +-
 .../tika/parser/iwork/AutoPageNumberUtilsTest.java |   85 +-
 .../apache/tika/parser/iwork/IWorkParserTest.java  |  141 +-
 .../tika/parser/iwork/iwana/IWork13ParserTest.java |   23 +-
 .../org/apache/tika/parser/audio/AudioParser.java  |   32 +-
 .../org/apache/tika/parser/audio/MidiParser.java   |   41 +-
 .../org/apache/tika/parser/mp3/AudioFrame.java     |  239 +-
 .../java/org/apache/tika/parser/mp3/ID3Tags.java   |  294 +--
 .../org/apache/tika/parser/mp3/ID3v1Handler.java   |  103 +-
 .../org/apache/tika/parser/mp3/ID3v22Handler.java  |   71 +-
 .../org/apache/tika/parser/mp3/ID3v23Handler.java  |   31 +-
 .../org/apache/tika/parser/mp3/ID3v24Handler.java  |   35 +-
 .../org/apache/tika/parser/mp3/ID3v2Frame.java     |  418 ++--
 .../org/apache/tika/parser/mp3/LyricsHandler.java  |   82 +-
 .../java/org/apache/tika/parser/mp3/MP3Frame.java  |    2 +-
 .../java/org/apache/tika/parser/mp3/Mp3Parser.java |  210 +-
 .../org/apache/tika/parser/mp3/MpegStream.java     |  445 ++--
 .../apache/tika/parser/mp4/ISO6709Extractor.java   |   26 +-
 .../java/org/apache/tika/parser/mp4/MP4Parser.java |  172 +-
 .../org/apache/tika/parser/video/FLVParser.java    |   81 +-
 .../apache/tika/parser/audio/AudioParserTest.java  |   15 +-
 .../apache/tika/parser/audio/MidiParserTest.java   |    9 +-
 .../org/apache/tika/parser/mp3/Mp3ParserTest.java  |  151 +-
 .../org/apache/tika/parser/mp3/MpegStreamTest.java |   93 +-
 .../org/apache/tika/parser/mp4/MP4ParserTest.java  |   30 +-
 .../apache/tika/parser/video/FLVParserTest.java    |    7 +-
 .../java/org/apache/tika/parser/dwg/DWGParser.java |  336 ++-
 .../java/org/apache/tika/parser/prt/PRTParser.java |  413 ++--
 .../org/apache/tika/parser/dwg/DWGParserTest.java  |   99 +-
 .../org/apache/tika/parser/prt/PRTParserTest.java  |  135 +-
 .../org/apache/tika/parser/asm/ClassParser.java    |   17 +-
 .../apache/tika/parser/asm/XHTMLClassVisitor.java  |   45 +-
 .../apache/tika/parser/code/SourceCodeParser.java  |   36 +-
 .../tika/parser/executable/ExecutableParser.java   |  656 ++---
 .../java/org/apache/tika/parser/mat/MatParser.java |   44 +-
 .../org/apache/tika/parser/sas/SAS7BDATParser.java |   60 +-
 .../apache/tika/parser/asm/ClassParserTest.java    |   28 +-
 .../tika/parser/code/SourceCodeParserTest.java     |   56 +-
 .../parser/executable/ExecutableParserTest.java    |   32 +-
 .../org/apache/tika/parser/mat/MatParserTest.java  |    3 +-
 .../apache/tika/parser/sas/SAS7BDATParserTest.java |   37 +-
 .../org/apache/tika/parser/crypto/Pkcs7Parser.java |   41 +-
 .../org/apache/tika/parser/crypto/TSDParser.java   |  257 +-
 .../apache/tika/parser/crypto/Pkcs7ParserTest.java |    3 +-
 .../apache/tika/parser/crypto/TSDParserTest.java   |   16 +-
 .../parser/digestutils/BouncyCastleDigester.java   |   13 +-
 .../tika/parser/digestutils/CommonsDigester.java   |   69 +-
 .../tika/parser/font/AdobeFontMetricParser.java    |  167 +-
 .../apache/tika/parser/font/TrueTypeParser.java    |   27 +-
 .../apache/tika/parser/font/FontParsersTest.java   |   28 +-
 .../sax/boilerpipe/BoilerpipeContentHandler.java   |   46 +-
 .../org/apache/tika/parser/html/DataURIScheme.java |   13 +-
 .../parser/html/DataURISchemeParseException.java   |    4 +-
 .../apache/tika/parser/html/DataURISchemeUtil.java |   14 +-
 .../apache/tika/parser/html/DefaultHtmlMapper.java |  122 +-
 .../tika/parser/html/HtmlEncodingDetector.java     |   66 +-
 .../org/apache/tika/parser/html/HtmlHandler.java   |  104 +-
 .../org/apache/tika/parser/html/HtmlParser.java    |   83 +-
 .../tika/parser/html/XHTMLDowngradeHandler.java    |   20 +-
 .../html/charsetdetector/CharsetAliases.java       |   55 +-
 .../charsetdetector/CharsetDetectionResult.java    |   12 +-
 .../parser/html/charsetdetector/MetaProcessor.java |   18 +-
 .../parser/html/charsetdetector/PreScanner.java    |   83 +-
 .../StandardHtmlEncodingDetector.java              |   28 +-
 .../charsets/XUserDefinedCharset.java              |    8 +-
 .../tika/parser/html/DataURISchemeParserTest.java  |   19 +-
 .../tika/parser/html/HtmlEncodingDetectorTest.java |   60 +-
 .../apache/tika/parser/html/HtmlParserTest.java    |  705 +++---
 .../html/StandardHtmlEncodingDetectorTest.java     |  139 +-
 .../tika/parser/image/AbstractImageParser.java     |   46 +-
 .../org/apache/tika/parser/image/BPGParser.java    |   30 +-
 .../org/apache/tika/parser/image/HeifParser.java   |   33 +-
 .../org/apache/tika/parser/image/ICNSParser.java   |   55 +-
 .../org/apache/tika/parser/image/ICNSType.java     |  241 +-
 .../tika/parser/image/ImageMetadataExtractor.java  |  159 +-
 .../org/apache/tika/parser/image/ImageParser.java  |   63 +-
 .../org/apache/tika/parser/image/JpegParser.java   |   12 +-
 .../apache/tika/parser/image/MetadataFields.java   |    5 +-
 .../org/apache/tika/parser/image/PSDParser.java    |   43 +-
 .../org/apache/tika/parser/image/TiffParser.java   |   11 +-
 .../org/apache/tika/parser/image/WebPParser.java   |   11 +-
 .../apache/tika/parser/image/HeifParserTest.java   |   16 +-
 .../apache/tika/parser/image/ICNSParserTest.java   |   33 +-
 .../parser/image/ImageMetadataExtractorTest.java   |   17 +-
 .../apache/tika/parser/image/ImageParserTest.java  |  110 +-
 .../apache/tika/parser/image/JpegParserTest.java   |   76 +-
 .../apache/tika/parser/image/PSDParserTest.java    |   13 +-
 .../apache/tika/parser/image/WebPParserTest.java   |    3 +-
 .../apache/tika/parser/jdbc/AbstractDBParser.java  |   33 +-
 .../apache/tika/parser/jdbc/JDBCTableReader.java   |   79 +-
 .../apache/tika/parser/mailcommons/MailUtil.java   |    9 +-
 .../tika/parser/mailcommons/MailUtilTest.java      |   11 +-
 .../tika/parser/mail/MailContentHandler.java       |  207 +-
 .../org/apache/tika/parser/mail/RFC822Parser.java  |   30 +-
 .../org/apache/tika/parser/mbox/MboxParser.java    |   36 +-
 .../apache/tika/parser/mail/RFC822ParserTest.java  |  175 +-
 .../apache/tika/parser/mbox/MboxParserTest.java    |   25 +-
 .../detect/microsoft/POIFSContainerDetector.java   |   87 +-
 .../detect/microsoft/ooxml/OPCPackageDetector.java |  224 +-
 .../microsoft/MSEmbeddedStreamTranslator.java      |   24 +-
 .../tika/parser/microsoft/AbstractListManager.java |   23 +-
 .../parser/microsoft/AbstractOfficeParser.java     |   61 +-
 .../parser/microsoft/AbstractPOIFSExtractor.java   |   57 +-
 .../org/apache/tika/parser/microsoft/Cell.java     |    3 +-
 .../tika/parser/microsoft/CellDecorator.java       |    3 +-
 .../apache/tika/parser/microsoft/EMFParser.java    |   78 +-
 .../tika/parser/microsoft/ExcelExtractor.java      |  142 +-
 .../tika/parser/microsoft/FormattingUtils.java     |   23 +-
 .../tika/parser/microsoft/HSLFExtractor.java       |  124 +-
 .../tika/parser/microsoft/JackcessExtractor.java   |   81 +-
 .../tika/parser/microsoft/JackcessParser.java      |   33 +-
 .../apache/tika/parser/microsoft/LinkedCell.java   |    3 +-
 .../apache/tika/parser/microsoft/ListManager.java  |   33 +-
 .../tika/parser/microsoft/MSOwnerFileParser.java   |   37 +-
 .../apache/tika/parser/microsoft/NumberCell.java   |    3 +-
 .../apache/tika/parser/microsoft/OfficeParser.java |  146 +-
 .../tika/parser/microsoft/OfficeParserConfig.java  |   69 +-
 .../tika/parser/microsoft/OldExcelParser.java      |   23 +-
 .../tika/parser/microsoft/OutlookExtractor.java    |  361 ++-
 .../tika/parser/microsoft/SummaryExtractor.java    |   68 +-
 .../apache/tika/parser/microsoft/TNEFParser.java   |   44 +-
 .../org/apache/tika/parser/microsoft/TextCell.java |    3 +-
 .../parser/microsoft/TikaExcelDataFormatter.java   |   11 +-
 .../parser/microsoft/TikaExcelGeneralFormat.java   |    2 +-
 .../apache/tika/parser/microsoft/WMFParser.java    |   24 +-
 .../tika/parser/microsoft/WordExtractor.java       |   98 +-
 .../tika/parser/microsoft/chm/ChmAccessor.java     |   10 +-
 .../tika/parser/microsoft/chm/ChmAssert.java       |  139 +-
 .../tika/parser/microsoft/chm/ChmBlockInfo.java    |  103 +-
 .../tika/parser/microsoft/chm/ChmCommons.java      |  293 +--
 .../tika/parser/microsoft/chm/ChmConstants.java    |   54 +-
 .../microsoft/chm/ChmDirectoryListingSet.java      |  234 +-
 .../tika/parser/microsoft/chm/ChmExtractor.java    |  284 +--
 .../tika/parser/microsoft/chm/ChmItsfHeader.java   |  192 +-
 .../tika/parser/microsoft/chm/ChmItspHeader.java   |  271 +--
 .../tika/parser/microsoft/chm/ChmLzxBlock.java     |  455 ++--
 .../tika/parser/microsoft/chm/ChmLzxState.java     |  262 +-
 .../parser/microsoft/chm/ChmLzxcControlData.java   |  147 +-
 .../parser/microsoft/chm/ChmLzxcResetTable.java    |  129 +-
 .../tika/parser/microsoft/chm/ChmParser.java       |   39 +-
 .../tika/parser/microsoft/chm/ChmPmgiHeader.java   |   51 +-
 .../tika/parser/microsoft/chm/ChmPmglHeader.java   |   62 +-
 .../tika/parser/microsoft/chm/ChmSection.java      |   61 +-
 .../tika/parser/microsoft/chm/ChmWrapper.java      |   12 +-
 .../microsoft/chm/DirectoryListingEntry.java       |   41 +-
 .../tika/parser/microsoft/onenote/CompactID.java   |    9 +-
 .../tika/parser/microsoft/onenote/Error.java       |   11 +-
 .../parser/microsoft/onenote/ExtendedGUID.java     |   11 +-
 .../microsoft/onenote/FileChunkReference.java      |   20 +-
 .../tika/parser/microsoft/onenote/FileNode.java    |  102 +-
 .../microsoft/onenote/FileNodeListHeader.java      |   32 +-
 .../tika/parser/microsoft/onenote/FileNodePtr.java |    3 +-
 .../parser/microsoft/onenote/FileNodeUnion.java    |   33 +-
 .../microsoft/onenote/FndStructureConstants.java   |   44 +-
 .../apache/tika/parser/microsoft/onenote/GUID.java |   65 +-
 .../apache/tika/parser/microsoft/onenote/JCID.java |   38 +-
 .../microsoft/onenote/JCIDPropertySetTypeEnum.java |   76 +-
 .../onenote/ObjectDeclarationWithRefCount.java     |   27 +-
 .../onenote/ObjectDeclarationWithRefCountBody.java |    3 +-
 .../onenote/ObjectSpaceObjectPropSet.java          |   12 +-
 ...ctSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java |    6 +-
 .../onenote/OneNoteDirectFileResource.java         |    9 +-
 .../parser/microsoft/onenote/OneNoteDocument.java  |   13 +-
 .../parser/microsoft/onenote/OneNoteHeader.java    |   18 +-
 .../onenote/OneNoteLegacyDumpStrings.java          |   47 +-
 .../parser/microsoft/onenote/OneNoteParser.java    |  149 +-
 .../microsoft/onenote/OneNotePropertyEnum.java     |  208 +-
 .../microsoft/onenote/OneNotePropertyId.java       |    7 +-
 .../tika/parser/microsoft/onenote/OneNotePtr.java  |  515 ++--
 .../microsoft/onenote/OneNoteTreeWalker.java       |  215 +-
 .../onenote/OneNoteTreeWalkerOptions.java          |   14 +-
 .../parser/microsoft/onenote/PropertyIDType.java   |    7 +-
 .../tika/parser/microsoft/onenote/PropertySet.java |   37 +-
 .../parser/microsoft/onenote/PropertyValue.java    |   20 +-
 .../tika/parser/microsoft/onenote/Revision.java    |   23 +-
 .../microsoft/onenote/RootObjectReference.java     |    3 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |  168 +-
 .../parser/microsoft/ooxml/MetadataExtractor.java  |   79 +-
 .../parser/microsoft/ooxml/OOXMLExtractor.java     |   10 +-
 .../microsoft/ooxml/OOXMLExtractorFactory.java     |   97 +-
 .../tika/parser/microsoft/ooxml/OOXMLParser.java   |   60 +-
 .../microsoft/ooxml/OOXMLTikaBodyPartHandler.java  |   72 +-
 .../ooxml/OOXMLWordAndPowerPointTextHandler.java   |  110 +-
 .../ooxml/POIXMLTextExtractorDecorator.java        |    8 +-
 .../microsoft/ooxml/ParagraphProperties.java       |   18 +-
 .../tika/parser/microsoft/ooxml/RunProperties.java |    9 +-
 .../ooxml/SXSLFPowerPointExtractorDecorator.java   |  258 +-
 .../ooxml/SXWPFWordExtractorDecorator.java         |   93 +-
 .../ooxml/XSLFPowerPointExtractorDecorator.java    |   95 +-
 .../ooxml/XSSFBExcelExtractorDecorator.java        |   48 +-
 .../ooxml/XSSFExcelExtractorDecorator.java         |  181 +-
 .../parser/microsoft/ooxml/XWPFListManager.java    |   21 +-
 .../ooxml/XWPFWordExtractorDecorator.java          |  124 +-
 .../microsoft/ooxml/xps/XPSExtractorDecorator.java |  122 +-
 .../microsoft/ooxml/xps/XPSPageContentHandler.java |   98 +-
 .../microsoft/ooxml/xps/XPSTextExtractor.java      |    7 +-
 .../xslf/XSLFEventBasedPowerPointExtractor.java    |   16 +-
 .../ooxml/xwpf/XWPFEventBasedWordExtractor.java    |   63 +-
 .../microsoft/ooxml/xwpf/XWPFStylesShim.java       |   28 +-
 .../ooxml/xwpf/ml2006/AbstractPartHandler.java     |   11 +-
 .../ooxml/xwpf/ml2006/BinaryDataHandler.java       |   12 +-
 .../ooxml/xwpf/ml2006/CorePropertiesHandler.java   |   17 +-
 .../xwpf/ml2006/ExtendedPropertiesHandler.java     |    3 +-
 .../microsoft/ooxml/xwpf/ml2006/PartHandler.java   |    7 +-
 .../ooxml/xwpf/ml2006/RelationshipsHandler.java    |    5 +-
 .../ooxml/xwpf/ml2006/RelationshipsManager.java    |    3 +-
 .../ooxml/xwpf/ml2006/Word2006MLDocHandler.java    |   40 +-
 .../ooxml/xwpf/ml2006/Word2006MLParser.java        |   21 +-
 .../ml2006/WordAndPowerPointTextPartHandler.java   |   17 +-
 .../parser/microsoft/pst/OutlookPSTParser.java     |   62 +-
 .../parser/microsoft/rtf/RTFEmbObjHandler.java     |   40 +-
 .../parser/microsoft/rtf/RTFObjDataParser.java     |   62 +-
 .../tika/parser/microsoft/rtf/RTFParser.java       |   53 +-
 .../tika/parser/microsoft/rtf/TextExtractor.java   |   93 +-
 .../microsoft/xml/AbstractXML2003Parser.java       |   41 +-
 .../parser/microsoft/xml/HyperlinkHandler.java     |   23 +-
 .../parser/microsoft/xml/SpreadsheetMLParser.java  |   50 +-
 .../tika/parser/microsoft/xml/WordMLParser.java    |   75 +-
 .../AbstractPOIContainerExtractionTest.java        |   16 +-
 .../tika/parser/microsoft/EMFParserTest.java       |   13 +-
 .../tika/parser/microsoft/ExcelParserTest.java     |   69 +-
 .../tika/parser/microsoft/JackcessParserTest.java  |   40 +-
 .../parser/microsoft/MSOwnerFileParserTest.java    |    7 +-
 .../tika/parser/microsoft/OfficeParserTest.java    |    4 +-
 .../tika/parser/microsoft/OldExcelParserTest.java  |   13 +-
 .../tika/parser/microsoft/OutlookParserTest.java   |   83 +-
 .../microsoft/POIContainerExtractionTest.java      |   19 +-
 .../parser/microsoft/PowerPointParserTest.java     |   47 +-
 .../tika/parser/microsoft/ProjectParserTest.java   |   27 +-
 .../tika/parser/microsoft/PublisherParserTest.java |   13 +-
 .../parser/microsoft/SolidworksParserTest.java     |   46 +-
 .../tika/parser/microsoft/TNEFParserTest.java      |    9 +-
 .../tika/parser/microsoft/VisioParserTest.java     |   13 +-
 .../tika/parser/microsoft/WMFParserTest.java       |    9 +-
 .../tika/parser/microsoft/WordParserTest.java      |  123 +-
 .../parser/microsoft/WriteProtectedParserTest.java |    9 +-
 .../parser/microsoft/chm/TestChmBlockInfo.java     |   50 +-
 .../parser/microsoft/chm/TestChmExtraction.java    |  161 +-
 .../parser/microsoft/chm/TestChmExtractor.java     |   16 +-
 .../parser/microsoft/chm/TestChmItsfHeader.java    |   40 +-
 .../parser/microsoft/chm/TestChmItspHeader.java    |   60 +-
 .../tika/parser/microsoft/chm/TestChmLzxState.java |   37 +-
 .../microsoft/chm/TestChmLzxcControlData.java      |   54 +-
 .../microsoft/chm/TestChmLzxcResetTable.java       |   59 +-
 .../microsoft/chm/TestDirectoryListingEntry.java   |    9 +-
 .../tika/parser/microsoft/chm/TestParameters.java  |   34 +-
 .../tika/parser/microsoft/chm/TestPmglHeader.java  |   24 +-
 .../microsoft/onenote/OneNoteParserTest.java       |   83 +-
 .../ooxml/OOXMLContainerExtractionTest.java        |   24 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |  274 +--
 .../parser/microsoft/ooxml/SXSLFExtractorTest.java |  122 +-
 .../parser/microsoft/ooxml/SXWPFExtractorTest.java |   97 +-
 .../parser/microsoft/ooxml/TruncatedOOXMLTest.java |   41 +-
 .../parser/microsoft/ooxml/xps/XPSParserTest.java  |   68 +-
 .../ooxml/xwpf/ml2006/Word2006MLParserTest.java    |   29 +-
 .../parser/microsoft/pst/OutlookPSTParserTest.java |   84 +-
 .../tika/parser/microsoft/rtf/RTFParserTest.java   |   91 +-
 .../parser/microsoft/xml/XML2003ParserTest.java    |   35 +-
 .../test-documents/testXPSWithDataDescriptor.xps   |  Bin 0 -> 44523 bytes
 .../test-documents/testXPSWithDataDescriptor2.xps  |  Bin 0 -> 51175 bytes
 .../apache/tika/detect/ole/MiscOLEDetector.java    |   69 +-
 .../java/org/apache/tika/parser/dbf/DBFCell.java   |   30 +-
 .../apache/tika/parser/dbf/DBFColumnHeader.java    |   68 +-
 .../org/apache/tika/parser/dbf/DBFFileHeader.java  |   46 +-
 .../java/org/apache/tika/parser/dbf/DBFParser.java |   34 +-
 .../java/org/apache/tika/parser/dbf/DBFReader.java |  167 +-
 .../java/org/apache/tika/parser/dbf/DBFRow.java    |   16 +-
 .../apache/tika/parser/dif/DIFContentHandler.java  |  241 +-
 .../java/org/apache/tika/parser/dif/DIFParser.java |   85 +-
 .../apache/tika/parser/epub/EpubContentParser.java |   29 +-
 .../org/apache/tika/parser/epub/EpubParser.java    |  124 +-
 .../apache/tika/parser/hwp/HwpStreamReader.java    |    2 +-
 .../apache/tika/parser/hwp/HwpTextExtractorV5.java |  111 +-
 .../org/apache/tika/parser/hwp/HwpV5Parser.java    |   16 +-
 .../apache/tika/parser/mif/MIFContentHandler.java  |   17 +-
 .../org/apache/tika/parser/mif/MIFExtractor.java   |   34 +-
 .../java/org/apache/tika/parser/mif/MIFParser.java |   42 +-
 .../parser/odf/FlatOpenDocumentMacroHandler.java   |   43 +-
 .../tika/parser/odf/FlatOpenDocumentParser.java    |  101 +-
 .../parser/odf/NSNormalizerContentHandler.java     |   29 +-
 .../tika/parser/odf/OpenDocumentBodyHandler.java   |  311 +--
 .../tika/parser/odf/OpenDocumentContentParser.java |   40 +-
 .../tika/parser/odf/OpenDocumentMacroHandler.java  |   16 +-
 .../parser/odf/OpenDocumentManifestHandler.java    |   35 +-
 .../tika/parser/odf/OpenDocumentMetaParser.java    |  101 +-
 .../apache/tika/parser/odf/OpenDocumentParser.java |  156 +-
 .../tika/parser/wordperfect/QPWTextExtractor.java  |  251 +-
 .../tika/parser/wordperfect/QuattroProParser.java  |   34 +-
 .../tika/parser/wordperfect/WP5Charsets.java       |  289 ++-
 .../wordperfect/WP5DocumentAreaExtractor.java      |   66 +-
 .../tika/parser/wordperfect/WP6Charsets.java       |  750 +++---
 .../wordperfect/WP6DocumentAreaExtractor.java      |   58 +-
 .../wordperfect/WPDocumentAreaExtractor.java       |   23 +-
 .../tika/parser/wordperfect/WPInputStream.java     |   25 +-
 .../tika/parser/wordperfect/WPPrefixArea.java      |   37 +-
 .../parser/wordperfect/WPPrefixAreaExtractor.java  |   10 +-
 .../tika/parser/wordperfect/WordPerfectParser.java |   78 +-
 .../org/apache/tika/parser/dbf/DBFParserTest.java  |   36 +-
 .../org/apache/tika/parser/dif/DIFParserTest.java  |   25 +-
 .../apache/tika/parser/epub/EpubParserTest.java    |   30 +-
 .../apache/tika/parser/hwp/HwpV5ParserTest.java    |   17 +-
 .../tika/parser/ibooks/iBooksParserTest.java       |   18 +-
 .../org/apache/tika/parser/mif/MIFParserTest.java  |    9 +-
 .../org/apache/tika/parser/odf/ODFParserTest.java  |  265 +-
 .../tika/parser/wordperfect/QuattroProTest.java    |   12 +-
 .../tika/parser/wordperfect/WPInputStreamTest.java |   14 +-
 .../tika/parser/wordperfect/WordPerfectTest.java   |   20 +-
 .../resources/test-documents/testODTEncrypted.odt  |  Bin 0 -> 12714 bytes
 .../org/apache/tika/parser/feed/FeedParser.java    |   76 +-
 .../apache/tika/parser/iptc/IptcAnpaParser.java    | 1404 +++++------
 .../apache/tika/parser/feed/FeedParserTest.java    |   23 +-
 .../apache/tika/parser/ocr/ImagePreprocessor.java  |   67 +-
 .../apache/tika/parser/ocr/TesseractOCRConfig.java |  197 +-
 .../apache/tika/parser/ocr/TesseractOCRParser.java |  364 ++-
 .../apache/tika/parser/ocr/tess4j/ImageDeskew.java |   10 +-
 .../apache/tika/parser/ocr/tess4j/ImageUtil.java   |   17 +-
 .../tika/parser/ocr/TesseractOCRConfigTest.java    |  149 +-
 .../tika/parser/ocr/TesseractOCRParserTest.java    |  101 +-
 .../resources/test-configs/TIKA-2705-tesseract.xml |   26 +-
 .../tika-config-tesseract-arbitrary.xml            |   22 +-
 .../test-configs/tika-config-tesseract-full.xml    |   38 +-
 .../tika-config-tesseract-load-langs.xml           |   20 +-
 .../test-configs/tika-config-tesseract-partial.xml |   32 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  484 ++--
 .../org/apache/tika/parser/pdf/AccessChecker.java  |   18 +-
 .../tika/parser/pdf/ImageGraphicsEngine.java       |  290 ++-
 .../java/org/apache/tika/parser/pdf/OCR2XHTML.java |   20 +-
 .../java/org/apache/tika/parser/pdf/PDF2XHTML.java |   97 +-
 .../tika/parser/pdf/PDFEncodedStringDecoder.java   |    6 +-
 .../tika/parser/pdf/PDFMarkedContent2XHTML.java    |  207 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java |  133 +-
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  360 +--
 .../apache/tika/parser/pdf/PDFPreflightParser.java |   82 +-
 .../tika/parser/pdf/PDMetadataExtractor.java       |   54 +-
 .../org/apache/tika/parser/pdf/XFAExtractor.java   |   75 +-
 .../apache/tika/parser/pdf/AccessCheckerTest.java  |    6 +-
 .../parser/pdf/PDFMarkedContent2XHTMLTest.java     |   21 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  324 +--
 .../tika/parser/pdf/PDFPreflightParserTest.java    |   18 +-
 .../testPDF_deeplyEmbeddedAttachments.pdf          |  Bin 0 -> 122221 bytes
 .../apache/tika/parser/pkg/CompressorParser.java   |   54 +-
 .../org/apache/tika/parser/pkg/PackageParser.java  |  223 +-
 .../java/org/apache/tika/parser/pkg/RarParser.java |   30 +-
 .../apache/tika/parser/pkg/AbstractPkgTest.java    |   94 +-
 .../org/apache/tika/parser/pkg/ArParserTest.java   |   11 +-
 .../apache/tika/parser/pkg/Bzip2ParserTest.java    |   37 +-
 .../apache/tika/parser/pkg/CompressParserTest.java |   39 +-
 .../tika/parser/pkg/CompressorParserTest.java      |   21 +-
 .../org/apache/tika/parser/pkg/GzipParserTest.java |   29 +-
 .../apache/tika/parser/pkg/PackageParserTest.java  |   19 +-
 .../org/apache/tika/parser/pkg/RarParserTest.java  |   99 +-
 .../apache/tika/parser/pkg/Seven7ParserTest.java   |   69 +-
 .../org/apache/tika/parser/pkg/TarParserTest.java  |   67 +-
 .../org/apache/tika/parser/pkg/ZipParserTest.java  |   98 +-
 .../org/apache/tika/parser/pkg/ZlibParserTest.java |   33 +-
 .../src/test/resources/test-documents/testSVG.svg  |    8 +-
 .../java/org/apache/tika/parser/csv/CSVParams.java |    4 +-
 .../java/org/apache/tika/parser/csv/CSVResult.java |   17 +-
 .../org/apache/tika/parser/csv/CSVSniffer.java     |   84 +-
 .../apache/tika/parser/csv/TextAndCSVParser.java   |  169 +-
 .../tika/parser/strings/Latin1StringsParser.java   |  145 +-
 .../apache/tika/parser/strings/StringsConfig.java  |  163 +-
 .../tika/parser/strings/StringsEncoding.java       |   62 +-
 .../apache/tika/parser/strings/StringsParser.java  |  495 ++--
 .../apache/tika/parser/txt/CharsetDetector.java    |   46 +-
 .../org/apache/tika/parser/txt/CharsetMatch.java   |   10 +-
 .../apache/tika/parser/txt/CharsetRecog_2022.java  |   20 +-
 .../apache/tika/parser/txt/CharsetRecog_UTF8.java  |    8 +-
 .../tika/parser/txt/CharsetRecog_Unicode.java      |    2 +-
 .../apache/tika/parser/txt/CharsetRecog_mbcs.java  |  113 +-
 .../apache/tika/parser/txt/CharsetRecog_sbcs.java  | 1801 ++++++++------
 .../apache/tika/parser/txt/CharsetRecognizer.java  |    2 +-
 .../tika/parser/txt/Icu4jEncodingDetector.java     |   17 +-
 .../java/org/apache/tika/parser/txt/TXTParser.java |   26 +-
 .../tika/parser/txt/UniversalEncodingDetector.java |   16 +-
 .../tika/parser/txt/UniversalEncodingListener.java |   21 +-
 .../org/apache/tika/parser/csv/CSVSnifferTest.java |   51 +-
 .../tika/parser/csv/TextAndCSVParserTest.java      |  112 +-
 .../parser/strings/Latin1StringsParserTest.java    |   28 +-
 .../tika/parser/strings/StringsConfigTest.java     |  111 +-
 .../tika/parser/strings/StringsParserTest.java     |   89 +-
 .../tika/parser/txt/CharsetDetectorTest.java       |   21 +-
 .../org/apache/tika/parser/txt/TXTParserTest.java  |  126 +-
 .../test-configs/tika-config-strings-full.xml      |   18 +-
 .../test-configs/tika-config-strings-partial.xml   |   16 +-
 .../src/test/resources/test-documents/resume.html  |  140 +-
 .../tika/parser/xliff/XLIFF12ContentHandler.java   |   15 +-
 .../apache/tika/parser/xliff/XLIFF12Parser.java    |   27 +-
 .../org/apache/tika/parser/xliff/XLZParser.java    |   43 +-
 .../tika/parser/xml/AbstractMetadataHandler.java   |   46 +-
 .../xml/AttributeDependantMetadataHandler.java     |   34 +-
 .../tika/parser/xml/AttributeMetadataHandler.java  |   28 +-
 .../org/apache/tika/parser/xml/DcXMLParser.java    |   22 +-
 .../tika/parser/xml/ElementMetadataHandler.java    |   69 +-
 .../apache/tika/parser/xml/FictionBookParser.java  |   33 +-
 .../apache/tika/parser/xml/MetadataHandler.java    |   33 +-
 .../tika/parser/xml/TextAndAttributeXMLParser.java |    6 +-
 .../java/org/apache/tika/parser/xml/XMLParser.java |   39 +-
 .../org/apache/tika/parser/xml/XMLProfiler.java    |   99 +-
 .../tika/parser/xliff/XLIFF12ParserTest.java       |    5 +-
 .../apache/tika/parser/xliff/XLZParserTest.java    |   18 +-
 .../apache/tika/parser/xml/DcXMLParserTest.java    |   27 +-
 .../EmptyAndDuplicateElementsXMLParserTest.java    |   56 +-
 .../tika/parser/xml/FictionBookParserTest.java     |   10 +-
 .../parser/xml/TextAndAttributeXMLParserTest.java  |   21 +-
 .../src/test/resources/test-documents/testXML.xml  |   30 +-
 .../src/test/resources/test-documents/testXML2.xml |   10 +-
 .../src/test/resources/test-documents/testXML3.xml |   38 +-
 .../apache/tika/parser/xmp/JempboxExtractor.java   |   91 +-
 .../apache/tika/parser/xmp/XMPPacketScanner.java   |    4 +-
 .../tika/parser/xmp/JempboxExtractorTest.java      |   31 +-
 .../src/test/resources/test-documents/testXMP.xmp  |  342 ++-
 .../tika/detect/zip/CompressorConstants.java       |    3 +-
 .../detect/zip/DefaultZipContainerDetector.java    |  165 +-
 .../DeprecatedStreamingZipContainerDetector.java   |   37 +-
 .../detect/zip/DeprecatedZipContainerDetector.java |    3 -
 .../org/apache/tika/detect/zip/IPADetector.java    |   21 +-
 .../org/apache/tika/detect/zip/JarDetector.java    |   14 +-
 .../org/apache/tika/detect/zip/KMZDetector.java    |   22 +-
 .../tika/detect/zip/OpenDocumentDetector.java      |   20 +-
 .../apache/tika/detect/zip/PackageConstants.java   |    1 +
 .../apache/tika/detect/zip/StarOfficeDetector.java |   78 +-
 .../tika/detect/zip/StreamingDetectContext.java    |   16 +-
 .../detect/zip/StreamingZipContainerDetector.java  |   13 +-
 .../tika/detect/zip/ZipContainerDetector.java      |   16 +-
 .../tika/detect/zip/ZipContainerDetectorBase.java  |   47 +-
 .../org/apache/tika/zip/utils/ZipSalvager.java     |  104 +-
 .../org/apache/tika/detect/zip/ZipParserTest.java  |   14 +-
 .../org/apache/tika/parser/internal/Activator.java |   22 +-
 .../apache/tika/config/TikaDetectorConfigTest.java |   87 +-
 .../tika/config/TikaEncodingDetectorTest.java      |   82 +-
 .../apache/tika/config/TikaParserConfigTest.java   |   69 +-
 .../tika/config/TikaTranslatorConfigTest.java      |   21 +-
 .../tika/detect/TestContainerAwareDetector.java    |  278 ++-
 .../apache/tika/detect/TestDetectorLoading.java    |   15 +-
 .../tika/detect/TestFileCommandDetector.java       |   12 +-
 .../tika/extractor/EmbeddedDocumentUtilTest.java   |    3 +-
 .../java/org/apache/tika/mime/MimeTypeTest.java    |   12 +-
 .../java/org/apache/tika/mime/MimeTypesTest.java   |    4 +-
 .../java/org/apache/tika/mime/TestMimeTypes.java   |  733 +++---
 .../apache/tika/parser/AutoDetectParserTest.java   |  357 ++-
 .../tika/parser/AutoDetectReaderParserTest.java    |   24 +-
 .../parser/BouncyCastleDigestingParserTest.java    |  125 +-
 .../apache/tika/parser/DigestingParserTest.java    |  120 +-
 .../org/apache/tika/parser/ParsingReaderTest.java  |   13 +-
 .../tika/parser/RecursiveParserWrapperTest.java    |   61 +-
 .../org/apache/tika/parser/TabularFormatsTest.java |  252 +-
 .../java/org/apache/tika/parser/TestParsers.java   |   49 +-
 .../apache/tika/parser/TestXMLEntityExpansion.java |   90 +-
 .../java/org/apache/tika/parser/TestXXEInXML.java  |  115 +-
 .../java/org/apache/tika/parser/XMLTestBase.java   |   80 +-
 .../parser/apple/AppleSingleFileParserTest.java    |    8 +-
 .../apache/tika/parser/apple/PListParserTest.java  |   11 +-
 .../apache/tika/parser/crypto/TSDParserTest.java   |   19 +-
 .../parser/fork/ForkParserIntegrationTest.java     |  285 +--
 .../apache/tika/parser/html/HtmlParserTest.java    |   20 +-
 .../apache/tika/parser/mail/MboxParserTest.java    |   16 +-
 .../apache/tika/parser/mail/RFC822ParserTest.java  |   42 +-
 .../tika/parser/microsoft/EMFParserTest.java       |   17 +-
 .../tika/parser/microsoft/ExcelParserTest.java     |    7 +-
 .../microsoft/POIContainerExtractionTest.java      |    9 +-
 .../parser/microsoft/PowerPointParserTest.java     |   14 +-
 .../tika/parser/microsoft/XML2003ParserTest.java   |   25 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |   12 +-
 .../parser/microsoft/ooxml/TruncatedOOXMLTest.java |   17 +-
 .../tika/parser/microsoft/rtf/RTFParserTest.java   |   65 +-
 .../apache/tika/parser/mock/MockParserTest.java    |   70 +-
 .../tika/parser/ocr/TesseractOCRParserTest.java    |   54 +-
 .../org/apache/tika/parser/odf/ODFParserTest.java  |   54 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  100 +-
 .../org/apache/tika/parser/pkg/ArParserTest.java   |   17 +-
 .../apache/tika/parser/pkg/Bzip2ParserTest.java    |   11 +-
 .../pkg/CompositeZipContainerDetectorTest.java     |  141 +-
 .../apache/tika/parser/pkg/CompressParserTest.java |   25 +-
 .../tika/parser/pkg/CompressorParserTest.java      |   17 +-
 .../org/apache/tika/parser/pkg/GzipParserTest.java |   13 +-
 .../org/apache/tika/parser/pkg/RarParserTest.java  |   16 +-
 .../apache/tika/parser/pkg/Seven7ParserTest.java   |   64 +-
 .../org/apache/tika/parser/pkg/TarParserTest.java  |   11 +-
 .../org/apache/tika/parser/pkg/ZipParserTest.java  |   65 +-
 .../org/apache/tika/parser/pkg/ZlibParserTest.java |   11 +-
 .../tika/parser/xml/FictionBookParserTest.java     |   14 +-
 .../sax/PhoneExtractingContentHandlerTest.java     |   20 +-
 .../sax/StandardsExtractingContentHandlerTest.java |   47 +-
 .../apache/tika/utils/ServiceLoaderUtilsTest.java  |    6 +-
 .../src/test/resources/log4j.properties            |    1 +
 .../test-documents/testJAVAPROPS.properties        |    1 +
 .../apache/tika/parser/envi/EnviHeaderParser.java  |  112 +-
 .../org/apache/tika/parser/gdal/GDALParser.java    |  249 +-
 .../geoinfo/GeographicInformationParser.java       |  451 ++--
 .../org/apache/tika/parser/grib/GribParser.java    |   41 +-
 .../java/org/apache/tika/parser/hdf/HDFParser.java |   32 +-
 .../org/apache/tika/parser/isatab/ISATabUtils.java |  311 +--
 .../apache/tika/parser/isatab/ISArchiveParser.java |  222 +-
 .../apache/tika/parser/netcdf/NetCDFParser.java    |   28 +-
 .../tika/parser/envi/EnviHeaderParserTest.java     |   59 +-
 .../apache/tika/parser/gdal/TestGDALParser.java    |  112 +-
 .../geoinfo/GeographicInformationParserTest.java   |    5 +-
 .../apache/tika/parser/grib/GribParserTest.java    |   17 +-
 .../org/apache/tika/parser/hdf/HDFParserTest.java  |   48 +-
 .../tika/parser/isatab/ISArchiveParserTest.java    |   68 +-
 .../tika/parser/netcdf/NetCDFParserTest.java       |   23 +-
 .../ground-truth/EnviHeaderGroundTruth.txt         |    1 +
 .../tika/parser/sqlite3/SQLite3DBParser.java       |   19 +-
 .../apache/tika/parser/sqlite3/SQLite3Parser.java  |   17 +-
 .../tika/parser/sqlite3/SQLite3TableReader.java    |   19 +-
 .../tika/parser/sqlite3/SQLite3ParserTest.java     |   37 +-
 .../apache/tika/mime/TestMimeTypesExtended.java    |   23 +-
 .../tika/parser/sqlite3/SQLite3ParserTest.java     |   68 +-
 tika-server/pom.xml                                |   38 +-
 .../server/classic/config/PDFServerConfig.java     |   42 +-
 .../classic/config/TesseractServerConfig.java      |   41 +-
 .../classic/resource/XMPMetadataResource.java      |   34 +-
 .../classic/writer/XMPMessageBodyWriter.java       |   26 +-
 .../src/main/resources/log4j.properties            |    4 +-
 .../tika/server/classic/DetectorResourceTest.java  |   55 +-
 .../apache/tika/server/classic/FetcherTest.java    |   35 +-
 .../tika/server/classic/MetadataResourceTest.java  |   96 +-
 .../classic/RecursiveMetadataFilterTest.java       |   42 +-
 .../classic/RecursiveMetadataResourceTest.java     |  215 +-
 .../tika/server/classic/TikaDetectorsTest.java     |   41 +-
 .../tika/server/classic/TikaMimeTypesTest.java     |   39 +-
 .../tika/server/classic/TikaParsersTest.java       |   46 +-
 .../tika/server/classic/TikaResourceTest.java      |  418 ++--
 .../tika/server/classic/UnpackerResourceTest.java  |   91 +-
 .../test/resources/config/TIKA-3137-include.xml    |   38 +-
 .../src/test/resources/log4j.properties            |    6 +-
 .../test/resources/test-documents/testHTML.html    |   20 +-
 .../org/apache/tika/server/client/TikaClient.java  |   31 +-
 .../apache/tika/server/client/TikaClientCLI.java   |   35 +-
 .../tika/server/client/TikaEmitterResult.java      |   19 +-
 .../apache/tika/server/client/TikaHttpClient.java  |   73 +-
 .../src/main/resources/log4j.properties            |    6 +-
 .../org/apache/tika/server/client/TestBasic.java   |   15 +-
 .../src/test/resources/log4j.properties            |    6 +-
 .../server/core/CompositeParseContextConfig.java   |   10 +-
 .../server/core/DefaultInputStreamFactory.java     |    9 +-
 .../tika/server/core/FetcherStreamFactory.java     |   17 +-
 .../org/apache/tika/server/core/HTMLHelper.java    |    7 +-
 .../tika/server/core/InputStreamFactory.java       |    9 +-
 .../org/apache/tika/server/core/MetadataList.java  |    7 +-
 .../tika/server/core/ParseContextConfig.java       |   16 +-
 .../org/apache/tika/server/core/ServerStatus.java  |  122 +-
 .../tika/server/core/ServerStatusWatcher.java      |   57 +-
 .../apache/tika/server/core/TikaLoggingFilter.java |    7 +-
 .../org/apache/tika/server/core/TikaServerCli.java |   39 +-
 .../apache/tika/server/core/TikaServerConfig.java  |  543 ++---
 .../tika/server/core/TikaServerParseException.java |    3 +-
 .../core/TikaServerParseExceptionMapper.java       |   12 +-
 .../apache/tika/server/core/TikaServerProcess.java |  194 +-
 .../tika/server/core/TikaServerWatchDog.java       |  222 +-
 .../apache/tika/server/core/WatchDogResult.java    |    7 +-
 .../server/core/config/DocumentSelectorConfig.java |   10 +-
 .../server/core/config/PasswordProviderConfig.java |   27 +-
 .../tika/server/core/resource/AsyncEmitter.java    |   30 +-
 .../tika/server/core/resource/AsyncParser.java     |   42 +-
 .../tika/server/core/resource/AsyncRequest.java    |    4 +-
 .../tika/server/core/resource/AsyncResource.java   |   56 +-
 .../server/core/resource/DetectorResource.java     |   21 +-
 .../tika/server/core/resource/EmitterResource.java |  122 +-
 .../server/core/resource/LanguageResource.java     |   55 +-
 .../server/core/resource/MetadataResource.java     |   62 +-
 .../core/resource/RecursiveMetadataResource.java   |  131 +-
 .../tika/server/core/resource/TikaDetectors.java   |   15 +-
 .../tika/server/core/resource/TikaMimeTypes.java   |   37 +-
 .../tika/server/core/resource/TikaParsers.java     |   43 +-
 .../tika/server/core/resource/TikaResource.java    |  243 +-
 .../server/core/resource/TikaServerStatus.java     |    8 +-
 .../tika/server/core/resource/TikaWelcome.java     |   44 +-
 .../server/core/resource/TranslateResource.java    |  135 +-
 .../server/core/resource/UnpackerResource.java     |   84 +-
 .../server/core/writer/CSVMessageBodyWriter.java   |   29 +-
 .../server/core/writer/JSONMessageBodyWriter.java  |   29 +-
 .../tika/server/core/writer/JSONObjWriter.java     |   30 +-
 .../core/writer/MetadataListMessageBodyWriter.java |   29 +-
 .../apache/tika/server/core/writer/TarWriter.java  |   25 +-
 .../server/core/writer/TextMessageBodyWriter.java  |   28 +-
 .../apache/tika/server/core/writer/ZipWriter.java  |   27 +-
 .../src/main/resources/tikaserver-template.html    |   18 +-
 .../main/resources/tikaserver-version.properties   |   15 +
 .../org/apache/tika/server/core/CXFTestBase.java   |   84 +-
 .../tika/server/core/IntegrationTestBase.java      |   92 +-
 .../tika/server/core/LanguageResourceTest.java     |  139 +-
 .../tika/server/core/NullWebClientLogger.java      |    5 +-
 .../apache/tika/server/core/ServerStatusTest.java  |   16 +-
 .../apache/tika/server/core/StackTraceOffTest.java |   54 +-
 .../apache/tika/server/core/StackTraceTest.java    |   62 +-
 .../apache/tika/server/core/TikaEmitterTest.java   |  162 +-
 .../apache/tika/server/core/TikaMimeTypesTest.java |   34 +-
 .../apache/tika/server/core/TikaResourceTest.java  |   60 +-
 .../core/TikaServerAsyncIntegrationTest.java       |  123 +-
 .../tika/server/core/TikaServerConfigTest.java     |   32 +-
 .../core/TikaServerEmitterIntegrationTest.java     |  147 +-
 .../server/core/TikaServerIntegrationTest.java     |  217 +-
 .../tika/server/core/TikaServerStatusTest.java     |   29 +-
 .../apache/tika/server/core/TikaVersionTest.java   |   20 +-
 .../apache/tika/server/core/TikaWelcomeTest.java   |   54 +-
 .../tika/server/core/TranslateResourceTest.java    |   90 +-
 .../src/test/resources/log4j.properties            |    6 +-
 .../test-documents/mock/heavy_hang_100.xml         |    2 +-
 .../test-documents/mock/heavy_hang_30000.xml       |    2 +-
 .../resources/test-documents/mock/system_exit.xml  |    2 +-
 .../test-documents/mock/testStaticStdOutErr.xml    |   45 +-
 .../test-documents/mock/testStdOutErr.xml          |   45 +-
 .../test-documents/mock/thread_interrupt.xml       |    2 +-
 1005 files changed, 38487 insertions(+), 37841 deletions(-)

diff --cc tika-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java
index 39643b2,db6dd4c..dda0054
--- a/tika-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java
@@@ -46,22 -59,9 +59,9 @@@ public abstract class AbstractEmitter i
       * @throws TikaEmitterException
       */
      @Override
 -    public void emit(List<EmitData> emitData) throws IOException, TikaEmitterException {
 +    public void emit(List<? extends EmitData> emitData) throws IOException, TikaEmitterException {
          for (EmitData d : emitData) {
 -            emit(d.getEmitKey().getKey(), d.getMetadataList());
 +            emit(d.getEmitKey().getEmitKey(), d.getMetadataList());
          }
      }
- 
-     public static long estimateSizeInBytes(String id, List<Metadata> metadataList) {
-         long sz = 36 + id.length() * 2;
-         for (Metadata m : metadataList) {
-             for (String n : m.names()) {
-                 sz += 36 + n.length() * 2;
-                 for (String v : m.getValues(n)) {
-                     sz += 36 + v.length() * 2;
-                 }
-             }
-         }
-         return sz;
-     }
  }
diff --cc tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchEmitTuple.java
index f6e7c74,1f7d5b9..974aa3b
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchEmitTuple.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchEmitTuple.java
@@@ -22,16 -22,11 +22,11 @@@ import org.apache.tika.pipes.fetcher.Fe
  
  public class FetchEmitTuple {
  
-     public enum ON_PARSE_EXCEPTION {
-         SKIP,
-         EMIT
-     }
      public static final ON_PARSE_EXCEPTION DEFAULT_ON_PARSE_EXCEPTION = ON_PARSE_EXCEPTION.EMIT;
      private final FetchKey fetchKey;
 -    private final EmitKey emitKey;
 +    private EmitKey emitKey;
      private final Metadata metadata;
      private final ON_PARSE_EXCEPTION onParseException;
- 
      public FetchEmitTuple(FetchKey fetchKey, EmitKey emitKey, Metadata metadata) {
          this(fetchKey, emitKey, metadata, DEFAULT_ON_PARSE_EXCEPTION);
      }
@@@ -59,17 -55,10 +55,13 @@@
          return onParseException;
      }
  
 +    public void setEmitKey(EmitKey emitKey) {
 +        this.emitKey = emitKey;
 +    }
      @Override
      public String toString() {
-         return "FetchEmitTuple{" +
-                 "fetchKey=" + fetchKey +
-                 ", emitKey=" + emitKey +
-                 ", metadata=" + metadata +
-                 ", onParseException=" + onParseException +
-                 '}';
+         return "FetchEmitTuple{" + "fetchKey=" + fetchKey + ", emitKey=" + emitKey + ", metadata=" +
+                 metadata + ", onParseException=" + onParseException + '}';
      }
  
      @Override
diff --cc tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index a44f0dc,c75b430..187cc3e
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@@ -185,11 -182,9 +181,11 @@@ public class TesseractOCRParserTest ext
          assertContainsCount("<body", xml, 1);
          assertContainsCount("</body", xml, 1);
          assertContainsCount("</html", xml, 1);
 +
 +        assertNotContained("<meta name=\"Content-Type\" content=\"image/ocr-jpeg\" />", xml);
      }
  
-     
+ 
      @Test
      public void getNormalMetadataToo() throws Exception {
          //this should be successful whether or not TesseractOCR is installed/active
diff --cc tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
index 047fa13,96c2e30..56a71db
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
@@@ -57,14 -63,17 +63,23 @@@ public class EmitterResource 
      private static final String FETCHER_NAME_ABBREV = "fn";
      private static final String FETCH_KEY_ABBREV = "fk";
      private static final String EMIT_KEY_ABBREV = "ek";
 +
 +    /**
 +     * key that is safe to pass through http header.
 +     * The user _must_ specify this for the fsemitter if calling 'put'
 +     */
 +    public static final String EMIT_KEY_FOR_HTTP_HEADER = "emit-key";
      private static final Logger LOG = LoggerFactory.getLogger(EmitterResource.class);
  
+     static EmitKey calcEmitKey(FetchEmitTuple t) {
+         //use fetch key if emitter key is not specified
+         //TODO: clean this up?
+         EmitKey emitKey = t.getEmitKey();
+         if (StringUtils.isBlank(emitKey.getKey())) {
+             emitKey = new EmitKey(emitKey.getEmitterName(), t.getFetchKey().getKey());
+         }
+         return emitKey;
+     }
  
      /**
       * @param is          input stream is ignored in 'get'
@@@ -188,24 -188,14 +197,24 @@@
          return emit(calcEmitKey(t), metadataList);
      }
  
 +    static EmitKey calcEmitKey(FetchEmitTuple t) {
 +        //use fetch key if emitter key is not specified
 +        //TODO: clean this up?
 +        EmitKey emitKey = t.getEmitKey();
-         if (StringUtils.isBlank(emitKey.getEmitKey())) {
++        if (StringUtils.isBlank(emitKey.getKey())) {
 +            emitKey = new EmitKey(emitKey.getEmitterName(), t.getFetchKey().getKey());
 +        }
 +        return emitKey;
 +    }
 +
      private Map<String, String> skip(FetchEmitTuple t, List<Metadata> metadataList) {
-             Map<String, String> statusMap = new HashMap<>();
-             statusMap.put("status", "ok");
-             statusMap.put("emitter", t.getEmitKey().getEmitterName());
-             statusMap.put("emitKey", t.getEmitKey().getEmitKey());
-             String msg = metadataList.get(0).get(TikaCoreProperties.CONTAINER_EXCEPTION);
-             statusMap.put("parse_exception", msg);
-             return statusMap;
+         Map<String, String> statusMap = new HashMap<>();
+         statusMap.put("status", "ok");
+         statusMap.put("emitter", t.getEmitKey().getEmitterName());
+         statusMap.put("emitKey", t.getEmitKey().getKey());
+         String msg = metadataList.get(0).get(TikaCoreProperties.CONTAINER_EXCEPTION);
+         statusMap.put("parse_exception", msg);
+         return statusMap;
      }
  
      private boolean checkParseException(FetchEmitTuple t, List<Metadata> metadataList) {
diff --cc tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
index 9594de4,fda802f..a99cea1
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
@@@ -261,13 -238,11 +238,14 @@@ public class TikaServerEmitterIntegrati
          return testOne(fileName, shouldFileExist, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
      }
  
-     private JsonNode testOne(String fileName, boolean shouldFileExist, FetchEmitTuple.ON_PARSE_EXCEPTION onParseException) throws Exception {
+     private JsonNode testOne(String fileName, boolean shouldFileExist,
+                              FetchEmitTuple.ON_PARSE_EXCEPTION onParseException) throws Exception {
  
          awaitServerStartup();
 -        Response response = WebClient.create(endPoint + "/emit").accept("application/json")
 +        System.out.println(getJsonString(fileName, onParseException));
 +        Response response = WebClient
 +                .create(endPoint + "/emit")
 +                .accept("application/json")
                  .post(getJsonString(fileName, onParseException));
          if (response.getStatus() == 200) {
              Path targFile = TMP_OUTPUT_DIR.resolve(fileName + ".json");