You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by le...@apache.org on 2016/06/30 19:25:52 UTC

[2/2] tika git commit: Merge branch '2.x' of https://git-wip-us.apache.org/repos/asf/tika into 2.x

Merge branch '2.x' of https://git-wip-us.apache.org/repos/asf/tika into 2.x


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/573527bb
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/573527bb
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/573527bb

Branch: refs/heads/2.x
Commit: 573527bbc608d495c40f26c02c7286197c3c723b
Parents: bd3ecfc 2a7e52e
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Thu Jun 30 12:32:01 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Thu Jun 30 12:32:01 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   36 +-
 tika-app/pom.xml                                |    8 +
 .../org/apache/tika/mime/TestMimeTypes.java     |   33 +-
 .../tika/parser/AutoDetectParserTest.java       |   24 +-
 .../ConfigurableThreadPoolExecutor.java         |   64 +-
 .../concurrent/SimpleThreadPoolExecutor.java    |   80 +-
 .../apache/tika/detect/AbstractDetector.java    |   86 +-
 .../org/apache/tika/detect/DetectorProxy.java   |  134 +-
 .../tika/detect/EncodingDetectorProxy.java      |   82 +-
 .../java/org/apache/tika/io/EndianUtils.java    |  830 ++---
 .../java/org/apache/tika/io/StringUtil.java     |  242 +-
 .../tika/metadata/TikaCoreProperties.java       |    7 +
 .../tika/osgi/TikaAbstractBundleActivator.java  |  142 +-
 .../java/org/apache/tika/osgi/TikaService.java  |   50 +-
 .../tika/osgi/internal/TikaServiceImpl.java     |  162 +-
 .../org/apache/tika/parser/AbstractParser.java  |   24 -
 .../org/apache/tika/parser/ParserProxy.java     |  148 +-
 .../org/apache/tika/utils/ConcurrentUtils.java  |  114 +-
 .../org/apache/tika/mime/tika-mimetypes.xml     |   54 +-
 .../java/org/apache/tika/TikaDetectionTest.java |    2 +-
 .../src/test/java/org/apache/tika/TikaTest.java |    6 +-
 .../org/apache/tika/config/DummyExecutor.java   |   60 +-
 .../apache/tika/detect/DetectorProxyTest.java   |  112 +-
 .../apache/tika/detect/DummyProxyDetector.java  |   62 +-
 .../org/apache/tika/io/EndianUtilsTest.java     |   35 +
 .../apache/tika/parser/DummyProxyParser.java    |   88 +-
 .../org/apache/tika/parser/ParserProxyTest.java |  130 +-
 .../apache/tika/utils/ConcurrentUtilsTest.java  |  126 +-
 .../services/org.apache.tika.parser.Parser      |   34 +-
 .../apache/tika/config/TIKA-1762-executors.xml  |   56 +-
 tika-parent/pom.xml                             |    9 +
 tika-parser-bundles/pom.xml                     |  350 +--
 .../tika-parser-advanced-bundle/pom.xml         |  162 +-
 .../tika-parser-cad-bundle/pom.xml              |  144 +-
 .../tika-parser-code-bundle/pom.xml             |  148 +-
 .../tika-parser-crypto-bundle/pom.xml           |  156 +-
 .../tika-parser-database-bundle/pom.xml         |  134 +-
 .../tika-parser-ebook-bundle/pom.xml            |  142 +-
 .../tika-parser-journal-bundle/pom.xml          |  158 +-
 .../apache/tika/module/journal/BundleIT.java    |    2 +-
 .../tika-parser-multimedia-bundle/pom.xml       |  168 +-
 .../tika-parser-office-bundle/pom.xml           |  280 +-
 .../org/apache/tika/module/office/BundleIT.java |   24 +-
 .../tika-parser-package-bundle/pom.xml          |  158 +-
 .../tika-parser-pdf-bundle/pom.xml              |  197 +-
 .../org/apache/tika/module/pdf/BundleIT.java    |    2 +-
 .../tika-parser-scientific-bundle/pom.xml       |  402 +--
 .../tika-parser-text-bundle/pom.xml             |  156 +-
 .../tika-parser-web-bundle/pom.xml              |  184 +-
 tika-parser-modules/pom.xml                     |  410 +--
 .../tika-parser-advanced-module/pom.xml         |  136 +-
 .../module/advanced/internal/Activator.java     |   72 +-
 .../tika-parser-cad-module/pom.xml              |  110 +-
 .../tika/module/cad/internal/Activator.java     |   72 +-
 .../org/apache/tika/parser/dwg/DWGParser.java   |  712 ++---
 .../org/apache/tika/parser/prt/PRTParser.java   |  555 ++--
 .../apache/tika/parser/dwg/DWGParserTest.java   |  372 ++-
 .../apache/tika/parser/prt/PRTParserTest.java   |  214 +-
 .../tika-parser-code-module/pom.xml             |  136 +-
 .../tika/module/code/internal/Activator.java    |   72 +-
 .../org/apache/tika/parser/asm/ClassParser.java |  108 +-
 .../tika/parser/asm/XHTMLClassVisitor.java      |  646 ++--
 .../tika/parser/code/SourceCodeParser.java      |  284 +-
 .../apache/tika/parser/asm/ClassParserTest.java |  118 +-
 .../tika/parser/code/SourceCodeParserTest.java  |  202 +-
 .../tika-parser-crypto-module/pom.xml           |  104 +-
 .../tika/module/crypto/internal/Activator.java  |   72 +-
 .../tika/parser/crypto/Pkcs7ParserTest.java     |   94 +-
 .../tika-parser-database-module/pom.xml         |  132 +-
 .../module/database/internal/Activator.java     |   72 +-
 .../tika-parser-ebook-module/pom.xml            |   94 +-
 .../tika/module/ebook/internal/Activator.java   |   72 +-
 .../tika/parser/epub/EpubContentParser.java     |  118 +-
 .../org/apache/tika/parser/epub/EpubParser.java |  238 +-
 .../apache/tika/parser/epub/EpubParserTest.java |  116 +-
 .../tika-parser-journal-module/pom.xml          |  134 +-
 .../tika/module/journal/internal/Activator.java |   72 +-
 .../tika-parser-multimedia-module/pom.xml       |  206 +-
 .../module/multimedia/internal/Activator.java   |   72 +-
 .../apache/tika/parser/audio/AudioParser.java   |  278 +-
 .../apache/tika/parser/audio/MidiParser.java    |  242 +-
 .../apache/tika/parser/font/TrueTypeParser.java |  222 +-
 .../parser/image/ImageMetadataExtractor.java    | 1124 +++----
 .../apache/tika/parser/image/ImageParser.java   |  406 +--
 .../tika/parser/image/MetadataFields.java       |  168 +-
 .../apache/tika/parser/image/TiffParser.java    |  136 +-
 .../org/apache/tika/parser/jpeg/JpegParser.java |  138 +-
 .../org/apache/tika/parser/mp3/AudioFrame.java  |  504 ++--
 .../tika/parser/mp3/CompositeTagHandler.java    |  284 +-
 .../org/apache/tika/parser/mp3/ID3Tags.java     |  508 ++--
 .../apache/tika/parser/mp3/ID3v1Handler.java    |  366 +--
 .../apache/tika/parser/mp3/ID3v22Handler.java   |  318 +-
 .../apache/tika/parser/mp3/ID3v23Handler.java   |  276 +-
 .../apache/tika/parser/mp3/ID3v24Handler.java   |  286 +-
 .../org/apache/tika/parser/mp3/ID3v2Frame.java  |  848 +++---
 .../apache/tika/parser/mp3/LyricsHandler.java   |  312 +-
 .../org/apache/tika/parser/mp3/MP3Frame.java    |   50 +-
 .../org/apache/tika/parser/mp3/Mp3Parser.java   |  492 +--
 .../tika/parser/ocr/TesseractOCRParser.java     |   93 +-
 .../org/apache/tika/parser/video/FLVParser.java |  536 ++--
 .../parser/ocr/TesseractOCRConfig.properties    |   40 +-
 .../tika/parser/audio/AudioParserTest.java      |  150 +-
 .../tika/parser/audio/MidiParserTest.java       |   84 +-
 .../image/ImageMetadataExtractorTest.java       |  278 +-
 .../tika/parser/image/ImageParserTest.java      |  324 +-
 .../tika/parser/image/MetadataFieldsTest.java   |   72 +-
 .../tika/parser/image/TiffParserTest.java       |  132 +-
 .../apache/tika/parser/jpeg/JpegParserTest.java |  568 ++--
 .../apache/tika/parser/mp3/Mp3ParserTest.java   |  828 ++---
 .../tika/parser/ocr/TesseractOCRConfigTest.java |  184 +-
 .../tika/parser/ocr/TesseractOCRParserTest.java |  527 ++--
 .../apache/tika/parser/video/FLVParserTest.java |   88 +-
 .../tika-parser-office-module/pom.xml           |  250 +-
 .../tika/module/office/internal/Activator.java  |   72 +-
 .../parser/apple/AppleSingleFileParser.java     |  204 ++
 .../org/apache/tika/parser/chm/ChmParser.java   |  224 +-
 .../tika/parser/chm/accessor/ChmAccessor.java   |   78 +-
 .../chm/accessor/ChmDirectoryListingSet.java    |  796 ++---
 .../tika/parser/chm/accessor/ChmItsfHeader.java |  984 +++---
 .../tika/parser/chm/accessor/ChmItspHeader.java | 1096 +++----
 .../parser/chm/accessor/ChmLzxcControlData.java |  638 ++--
 .../parser/chm/accessor/ChmLzxcResetTable.java  |  682 ++---
 .../tika/parser/chm/accessor/ChmPmgiHeader.java |  352 +--
 .../tika/parser/chm/accessor/ChmPmglHeader.java |  412 +--
 .../chm/accessor/DirectoryListingEntry.java     |  302 +-
 .../tika/parser/chm/assertion/ChmAssert.java    |  338 +--
 .../apache/tika/parser/chm/core/ChmCommons.java |  722 ++---
 .../tika/parser/chm/core/ChmConstants.java      |  204 +-
 .../tika/parser/chm/core/ChmExtractor.java      |  784 ++---
 .../apache/tika/parser/chm/core/ChmWrapper.java |  294 +-
 .../chm/exception/ChmParsingException.java      |   54 +-
 .../tika/parser/chm/lzx/ChmBlockInfo.java       |  470 +--
 .../apache/tika/parser/chm/lzx/ChmLzxBlock.java | 1826 +++++------
 .../apache/tika/parser/chm/lzx/ChmLzxState.java |  654 ++--
 .../apache/tika/parser/chm/lzx/ChmSection.java  |  444 +--
 .../org/apache/tika/parser/mbox/MboxParser.java |  418 +--
 .../tika/parser/mbox/OutlookPSTParser.java      |  406 +--
 .../microsoft/AbstractPOIFSExtractor.java       |   32 +-
 .../tika/parser/microsoft/HSLFExtractor.java    |   18 +-
 .../parser/microsoft/JackcessExtractor.java     |    4 +-
 .../parser/microsoft/MSOwnerFileParser.java     |   80 +
 .../tika/parser/microsoft/OfficeParser.java     |    2 +-
 .../tika/parser/microsoft/WordExtractor.java    |   22 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java |   12 +-
 .../microsoft/xml/AbstractXML2003Parser.java    |   93 +-
 .../microsoft/xml/SpreadsheetMLParser.java      |   42 +-
 .../tika/parser/microsoft/xml/WordMLParser.java |  121 +-
 .../parser/odf/NSNormalizerContentHandler.java  |  198 +-
 .../parser/odf/OpenDocumentContentParser.java   |  992 +++---
 .../tika/parser/odf/OpenDocumentMetaParser.java |  398 +--
 .../tika/parser/odf/OpenDocumentParser.java     |  450 +--
 .../org/apache/tika/parser/opc/OPCDetector.java |  310 +-
 .../parser/opendocument/OpenOfficeParser.java   |   56 +-
 .../org/apache/tika/parser/rtf/GroupState.java  |  134 +-
 .../apache/tika/parser/rtf/ListDescriptor.java  |   70 +-
 .../tika/parser/rtf/RTFEmbObjHandler.java       |    7 +-
 .../tika/parser/rtf/RTFObjDataParser.java       |   43 +-
 .../org/apache/tika/parser/rtf/RTFParser.java   |  186 +-
 .../apache/tika/parser/rtf/TextExtractor.java   | 2853 +++++++++---------
 .../services/org.apache.tika.parser.Parser      |    3 +-
 .../parser/apple/AppleSingleFileParserTest.java |   43 +
 .../tika/parser/chm/TestChmBlockInfo.java       |  250 +-
 .../tika/parser/chm/TestChmExtraction.java      |  424 +--
 .../tika/parser/chm/TestChmExtractor.java       |  126 +-
 .../tika/parser/chm/TestChmItsfHeader.java      |  244 +-
 .../tika/parser/chm/TestChmItspHeader.java      |  320 +-
 .../apache/tika/parser/chm/TestChmLzxState.java |  202 +-
 .../tika/parser/chm/TestChmLzxcControlData.java |  288 +-
 .../tika/parser/chm/TestChmLzxcResetTable.java  |  312 +-
 .../parser/chm/TestDirectoryListingEntry.java   |  170 +-
 .../apache/tika/parser/chm/TestParameters.java  |  208 +-
 .../apache/tika/parser/chm/TestPmgiHeader.java  |   90 +-
 .../apache/tika/parser/chm/TestPmglHeader.java  |  152 +-
 .../apache/tika/parser/dbf/DBFParserTest.java   |    2 +
 .../apache/tika/parser/mbox/MboxParserTest.java |  312 +-
 .../tika/parser/mbox/OutlookPSTParserTest.java  |  220 +-
 .../AbstractPOIContainerExtractionTest.java     |  150 +-
 .../tika/parser/microsoft/ExcelParserTest.java  |  817 ++---
 .../parser/microsoft/MSOwnerFileParserTest.java |   31 +
 .../tika/parser/microsoft/OfficeParserTest.java |   92 +-
 .../parser/microsoft/OutlookParserTest.java     |  478 +--
 .../microsoft/POIContainerExtractionTest.java   |  764 ++---
 .../parser/microsoft/PowerPointParserTest.java  |  492 +--
 .../parser/microsoft/PublisherParserTest.java   |  106 +-
 .../tika/parser/microsoft/TNEFParserTest.java   |  196 +-
 .../tika/parser/microsoft/VisioParserTest.java  |  102 +-
 .../tika/parser/microsoft/WordParserTest.java   | 1011 ++++---
 .../ooxml/OOXMLContainerExtractionTest.java     |    2 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java |   27 +
 .../parser/microsoft/xml/XML2003ParserTest.java |   40 +-
 .../apache/tika/parser/odf/ODFParserTest.java   |  680 ++---
 .../apache/tika/parser/rtf/RTFParserTest.java   | 1050 ++++---
 .../tika-parser-package-module/pom.xml          |  150 +-
 .../tika/module/pkg/internal/Activator.java     |   72 +-
 .../tika/parser/iwork/AutoPageNumberUtils.java  |  224 +-
 .../tika/parser/iwork/IWorkPackageParser.java   |  438 +--
 .../parser/iwork/KeynoteContentHandler.java     |  348 +--
 .../parser/iwork/NumbersContentHandler.java     |  462 +--
 .../tika/parser/iwork/PagesContentHandler.java  |  896 +++---
 .../apache/tika/parser/pkg/PackageParser.java   |  574 ++--
 .../tika/parser/pkg/ZipContainerDetector.java   |  648 ++--
 .../parser/iwork/AutoPageNumberUtilsTest.java   |  156 +-
 .../tika/parser/iwork/IWorkParserTest.java      |  932 +++---
 .../apache/tika/parser/pkg/AbstractPkgTest.java |  186 +-
 .../apache/tika/parser/pkg/Bzip2ParserTest.java |  178 +-
 .../apache/tika/parser/pkg/GzipParserTest.java  |  204 +-
 .../apache/tika/parser/pkg/TarParserTest.java   |  210 +-
 .../apache/tika/parser/pkg/ZipParserTest.java   |  384 +--
 .../tika-parser-pdf-module/pom.xml              |  246 +-
 .../tika/module/pdf/internal/Activator.java     |   72 +-
 .../tika/parser/pdf/AbstractPDF2XHTML.java      |  579 ++++
 .../org/apache/tika/parser/pdf/OCR2XHTML.java   |  125 +
 .../org/apache/tika/parser/pdf/PDF2XHTML.java   |  518 +---
 .../org/apache/tika/parser/pdf/PDFParser.java   |    8 +
 .../apache/tika/parser/pdf/PDFParserConfig.java |  274 +-
 .../apache/tika/parser/pdf/PDFParser.properties |   10 +-
 .../apache/tika/parser/pdf/PDFParserTest.java   |   49 +-
 .../tika-parser-scientific-module/pom.xml       |  270 +-
 .../module/scientific/internal/Activator.java   |   72 +-
 .../org/apache/tika/parser/hdf/HDFParser.java   |  244 +-
 .../apache/tika/parser/hdf/HDFParserTest.java   |  144 +-
 .../tika/parser/netcdf/NetCDFParserTest.java    |  122 +-
 .../tika-parser-text-module/pom.xml             |  132 +-
 .../tika/module/text/internal/Activator.java    |   40 +-
 .../apache/tika/parser/txt/CharsetDetector.java | 1088 +++----
 .../apache/tika/parser/txt/CharsetMatch.java    |  572 ++--
 .../tika/parser/txt/CharsetRecog_2022.java      |  326 +-
 .../tika/parser/txt/CharsetRecog_UTF8.java      |  198 +-
 .../tika/parser/txt/CharsetRecog_Unicode.java   |  278 +-
 .../tika/parser/txt/CharsetRecog_mbcs.java      | 1064 +++----
 .../tika/parser/txt/CharsetRecog_sbcs.java      | 2706 ++++++++---------
 .../tika/parser/txt/CharsetRecognizer.java      |  108 +-
 .../org/apache/tika/parser/txt/TXTParser.java   |  196 +-
 .../parser/xml/AbstractMetadataHandler.java     |  186 +-
 .../xml/AttributeDependantMetadataHandler.java  |  164 +-
 .../parser/xml/AttributeMetadataHandler.java    |  122 +-
 .../org/apache/tika/parser/xml/DcXMLParser.java |  120 +-
 .../tika/parser/xml/ElementMetadataHandler.java |  510 ++--
 .../tika/parser/xml/FictionBookParser.java      |  234 +-
 .../apache/tika/parser/xml/MetadataHandler.java |  170 +-
 .../org/apache/tika/parser/xml/XMLParser.java   |  178 +-
 .../apache/tika/parser/txt/TXTParserTest.java   |  548 ++--
 .../apache/tika/parser/xml/DcXMLParserTest.java |  174 +-
 .../EmptyAndDuplicateElementsXMLParserTest.java |  232 +-
 .../tika/parser/xml/FictionBookParserTest.java  |  108 +-
 .../tika-parser-web-module/pom.xml              |  178 +-
 .../tika/module/web/internal/Activator.java     |   72 +-
 .../org/apache/tika/parser/feed/FeedParser.java |  254 +-
 .../parser/html/BoilerpipeContentHandler.java   |  694 ++---
 .../tika/parser/html/DefaultHtmlMapper.java     |  274 +-
 .../apache/tika/parser/html/HtmlHandler.java    |  618 ++--
 .../org/apache/tika/parser/html/HtmlMapper.java |  138 +-
 .../org/apache/tika/parser/html/HtmlParser.java |  388 +--
 .../tika/parser/html/IdentityHtmlMapper.java    |   86 +-
 .../tika/parser/html/XHTMLDowngradeHandler.java |  156 +-
 .../tika/parser/mail/MailContentHandler.java    |  752 ++---
 .../apache/tika/parser/mail/RFC822Parser.java   |  190 +-
 .../apache/tika/parser/feed/FeedParserTest.java |  150 +-
 .../apache/tika/parser/html/HtmlParserTest.java | 2262 +++++++-------
 .../tika/parser/mail/RFC822ParserTest.java      |  970 +++---
 .../tika/parser/xmp/JempboxExtractor.java       |   30 +
 .../tika/parser/xmp/JempboxExtractorTest.java   |   29 +-
 .../test-documents/testAppleSingleFile.pdf      |  Bin 0 -> 1893 bytes
 .../test/resources/test-documents/testDJVU.djvu |  Bin 0 -> 89 bytes
 .../test-documents/testEXCEL_embeddedPDF.xls    |  Bin 0 -> 38400 bytes
 .../test-documents/testEXCEL_embeddedPDF.xlsx   |  Bin 0 -> 25602 bytes
 .../test-documents/testEndNoteImportFile.enw    |   10 +
 .../resources/test-documents/testICalendar.ics  |   15 +
 .../resources/test-documents/testMSOwnerFile    |  Bin 0 -> 162 bytes
 .../test-documents/testPPT_embeddedPDF.ppt      |  Bin 0 -> 187392 bytes
 .../test-documents/testPPT_embeddedPDF.pptx     |  Bin 0 -> 108637 bytes
 .../resources/test-documents/testVCalendar.vcs  |   10 +
 .../test-documents/testWindowsMediaMeta.asx     |    6 +
 .../test/resources/test-documents/testXMP.xmp   |  178 ++
 .../test-documents/test_recursive_embedded.doc  |  Bin 0 -> 31744 bytes
 275 files changed, 39074 insertions(+), 37550 deletions(-)
----------------------------------------------------------------------