You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by le...@apache.org on 2016/10/26 02:37:08 UTC

[3/7] tika git commit: Merge master into TIKA-1343

Merge master into TIKA-1343


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/fe559b80
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/fe559b80
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/fe559b80

Branch: refs/heads/master
Commit: fe559b80bcad1f107904ca7a89724a26ea2921a1
Parents: 4aff483 23a11ef
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Fri Jul 1 13:35:52 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Fri Jul 1 13:35:52 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   38 +-
 pom.xml                                         |    2 +-
 tika-app/pom.xml                                |    2 +-
 .../main/java/org/apache/tika/cli/TikaCLI.java  |    9 +-
 tika-batch/pom.xml                              |    2 +-
 tika-bundle/pom.xml                             |    2 +-
 tika-core/pom.xml                               |    2 +-
 .../org/apache/tika/detect/NameDetector.java    |   15 +-
 .../tika/detect/ZeroSizeFileDetector.java       |   45 +
 .../java/org/apache/tika/io/EndianUtils.java    |  829 +++---
 .../tika/metadata/TikaCoreProperties.java       |    7 +
 .../java/org/apache/tika/mime/MediaType.java    |    3 +
 .../org/apache/tika/mime/MediaTypeRegistry.java |    2 +
 .../org/apache/tika/mime/tika-mimetypes.xml     |   69 +-
 .../java/org/apache/tika/TikaDetectionTest.java |    2 +-
 .../src/test/java/org/apache/tika/TikaTest.java |    6 +-
 .../apache/tika/detect/NameDetectorTest.java    |   10 +
 .../tika/detect/ZeroSizeFileDetectorTest.java   |   64 +
 .../org/apache/tika/io/EndianUtilsTest.java     |   35 +
 tika-example/pom.xml                            |    2 +-
 tika-java7/pom.xml                              |    2 +-
 tika-langdetect/pom.xml                         |    3 +-
 ...apache.tika.language.detect.LanguageDetector |   15 +
 tika-parent/pom.xml                             |    4 +-
 tika-parsers/pom.xml                            |    4 +-
 .../parser/apple/AppleSingleFileParser.java     |  205 ++
 .../org/apache/tika/parser/dbf/DBFCell.java     |  147 +
 .../apache/tika/parser/dbf/DBFColumnHeader.java |   97 +
 .../apache/tika/parser/dbf/DBFFileHeader.java   |  144 +
 .../org/apache/tika/parser/dbf/DBFParser.java   |  155 ++
 .../org/apache/tika/parser/dbf/DBFReader.java   |  207 ++
 .../java/org/apache/tika/parser/dbf/DBFRow.java |   62 +
 .../apache/tika/parser/geo/topic/GeoParser.java |   14 +-
 .../tika/parser/html/HtmlEncodingDetector.java  |   16 +-
 .../apache/tika/parser/html/HtmlHandler.java    |    3 +
 .../tika/parser/image/xmp/JempboxExtractor.java |   30 +
 .../iwork/iwana/IWork13PackageParser.java       |   86 +
 .../tika/parser/mail/MailContentHandler.java    |  110 +-
 .../microsoft/AbstractPOIFSExtractor.java       |   32 +-
 .../tika/parser/microsoft/HSLFExtractor.java    |   32 +-
 .../parser/microsoft/JackcessExtractor.java     |    4 +-
 .../parser/microsoft/MSOwnerFileParser.java     |   81 +
 .../tika/parser/microsoft/OfficeParser.java     |    2 +-
 .../tika/parser/microsoft/WordExtractor.java    |   22 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java |   12 +-
 .../ooxml/XSLFPowerPointExtractorDecorator.java |   58 +-
 .../ooxml/XSSFExcelExtractorDecorator.java      |   99 +-
 .../microsoft/xml/AbstractXML2003Parser.java    |  128 +
 .../parser/microsoft/xml/HyperlinkHandler.java  |   96 +
 .../microsoft/xml/SpreadsheetMLParser.java      |  175 ++
 .../tika/parser/microsoft/xml/WordMLParser.java |  306 +++
 .../parser/ner/grobid/GrobidNERecogniser.java   |   28 +-
 .../tika/parser/ocr/TesseractOCRParser.java     |   87 +-
 .../tika/parser/pdf/AbstractPDF2XHTML.java      |  578 ++++
 .../org/apache/tika/parser/pdf/OCR2XHTML.java   |  127 +
 .../org/apache/tika/parser/pdf/PDF2XHTML.java   |  518 +---
 .../org/apache/tika/parser/pdf/PDFParser.java   |    7 +
 .../apache/tika/parser/pdf/PDFParserConfig.java |  274 +-
 .../tika/parser/pkg/ZipContainerDetector.java   |   12 +
 .../tika/parser/rtf/RTFEmbObjHandler.java       |    7 +-
 .../tika/parser/rtf/RTFObjDataParser.java       |   43 +-
 .../apache/tika/parser/rtf/TextExtractor.java   |   11 +-
 .../services/org.apache.tika.parser.Parser      |    7 +-
 .../apache/tika/parser/pdf/PDFParser.properties |   10 +-
 .../tika/detect/TestContainerAwareDetector.java |   11 +
 .../org/apache/tika/mime/TestMimeTypes.java     |   38 +-
 .../parser/apple/AppleSingleFileParserTest.java |   46 +
 .../apache/tika/parser/dbf/DBFParserTest.java   |  158 ++
 .../apache/tika/parser/html/HtmlParserTest.java |   60 +-
 .../parser/image/xmp/JempboxExtractorTest.java  |   29 +-
 .../tika/parser/mail/RFC822ParserTest.java      |  115 +
 .../tika/parser/microsoft/ExcelParserTest.java  |   28 +-
 .../parser/microsoft/MSOwnerFileParserTest.java |   31 +
 .../microsoft/POIContainerExtractionTest.java   |    4 +-
 .../parser/microsoft/PowerPointParserTest.java  |   13 +-
 .../tika/parser/microsoft/WordParserTest.java   |   19 +
 .../ooxml/OOXMLContainerExtractionTest.java     |    2 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java |   43 +-
 .../parser/microsoft/xml/XML2003ParserTest.java |  109 +
 .../apache/tika/parser/pdf/PDFParserTest.java   |   74 +-
 .../apache/tika/parser/rtf/RTFParserTest.java   |  127 +-
 .../test-documents/testAppleSingleFile.pdf      |  Bin 0 -> 1893 bytes
 .../test/resources/test-documents/testDBF.dbf   |  Bin 0 -> 890 bytes
 .../test-documents/testDBF_gb18030.dbf          |  Bin 0 -> 144 bytes
 .../test/resources/test-documents/testDJVU.djvu |  Bin 0 -> 89 bytes
 .../resources/test-documents/testEXCEL2003.xml  |  100 +
 .../test-documents/testEXCEL_hyperlinks.xls     |  Bin 0 -> 29696 bytes
 .../test-documents/testEXCEL_hyperlinks.xlsx    |  Bin 0 -> 10038 bytes
 .../resources/test-documents/testEXCEL_poi.xlsx |  Bin 0 -> 3360 bytes
 .../test-documents/testEndNoteImportFile.enw    |   10 +
 .../test-documents/testExcel_embeddedPDF.xls    |  Bin 0 -> 38400 bytes
 .../test-documents/testExcel_embeddedPDF.xlsx   |  Bin 0 -> 25602 bytes
 .../resources/test-documents/testICalendar.ics  |   15 +
 .../test-documents/testKeynote2013.key          |  Bin 0 -> 274397 bytes
 .../resources/test-documents/testKeynoteNew.key |  Bin 274397 -> 0 bytes
 .../resources/test-documents/testMSOwnerFile    |  Bin 0 -> 162 bytes
 .../test-documents/testNumbers2013.numbers      |  Bin 0 -> 179147 bytes
 .../test-documents/testNumbersNew.numbers       |  Bin 179147 -> 0 bytes
 .../test-documents/testPPT_EmbeddedPDF.ppt      |  Bin 0 -> 187392 bytes
 .../test-documents/testPPT_EmbeddedPDF.pptx     |  Bin 0 -> 108637 bytes
 .../test-documents/testPages2013.pages          |  Bin 0 -> 237567 bytes
 .../resources/test-documents/testPagesNew.pages |  Bin 237567 -> 0 bytes
 .../test-documents/testRFC822_date_utf8         |    8 +
 .../resources/test-documents/testRFC822_eml     |   33 +
 .../resources/test-documents/testVCalendar.vcs  |   10 +
 .../resources/test-documents/testWORD2003.xml   | 2542 ++++++++++++++++++
 .../test-documents/testWindowsMediaMeta.asx     |    6 +
 .../test/resources/test-documents/testXMP.xmp   |  178 ++
 .../test-documents/test_recursive_embedded.doc  |  Bin 0 -> 31744 bytes
 tika-serialization/pom.xml                      |    2 +-
 tika-server/pom.xml                             |    2 +-
 tika-translate/pom.xml                          |    2 +-
 .../translate/translator.yandex.properties      |    2 +-
 tika-xmp/pom.xml                                |    2 +-
 114 files changed, 7822 insertions(+), 1203 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/fe559b80/tika-parsers/pom.xml
----------------------------------------------------------------------