You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by le...@apache.org on 2016/10/26 02:37:08 UTC
[3/7] tika git commit: Merge master into TIKA-1343
Merge master into TIKA-1343
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/fe559b80
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/fe559b80
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/fe559b80
Branch: refs/heads/master
Commit: fe559b80bcad1f107904ca7a89724a26ea2921a1
Parents: 4aff483 23a11ef
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Fri Jul 1 13:35:52 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Fri Jul 1 13:35:52 2016 -0700
----------------------------------------------------------------------
CHANGES.txt | 38 +-
pom.xml | 2 +-
tika-app/pom.xml | 2 +-
.../main/java/org/apache/tika/cli/TikaCLI.java | 9 +-
tika-batch/pom.xml | 2 +-
tika-bundle/pom.xml | 2 +-
tika-core/pom.xml | 2 +-
.../org/apache/tika/detect/NameDetector.java | 15 +-
.../tika/detect/ZeroSizeFileDetector.java | 45 +
.../java/org/apache/tika/io/EndianUtils.java | 829 +++---
.../tika/metadata/TikaCoreProperties.java | 7 +
.../java/org/apache/tika/mime/MediaType.java | 3 +
.../org/apache/tika/mime/MediaTypeRegistry.java | 2 +
.../org/apache/tika/mime/tika-mimetypes.xml | 69 +-
.../java/org/apache/tika/TikaDetectionTest.java | 2 +-
.../src/test/java/org/apache/tika/TikaTest.java | 6 +-
.../apache/tika/detect/NameDetectorTest.java | 10 +
.../tika/detect/ZeroSizeFileDetectorTest.java | 64 +
.../org/apache/tika/io/EndianUtilsTest.java | 35 +
tika-example/pom.xml | 2 +-
tika-java7/pom.xml | 2 +-
tika-langdetect/pom.xml | 3 +-
...apache.tika.language.detect.LanguageDetector | 15 +
tika-parent/pom.xml | 4 +-
tika-parsers/pom.xml | 4 +-
.../parser/apple/AppleSingleFileParser.java | 205 ++
.../org/apache/tika/parser/dbf/DBFCell.java | 147 +
.../apache/tika/parser/dbf/DBFColumnHeader.java | 97 +
.../apache/tika/parser/dbf/DBFFileHeader.java | 144 +
.../org/apache/tika/parser/dbf/DBFParser.java | 155 ++
.../org/apache/tika/parser/dbf/DBFReader.java | 207 ++
.../java/org/apache/tika/parser/dbf/DBFRow.java | 62 +
.../apache/tika/parser/geo/topic/GeoParser.java | 14 +-
.../tika/parser/html/HtmlEncodingDetector.java | 16 +-
.../apache/tika/parser/html/HtmlHandler.java | 3 +
.../tika/parser/image/xmp/JempboxExtractor.java | 30 +
.../iwork/iwana/IWork13PackageParser.java | 86 +
.../tika/parser/mail/MailContentHandler.java | 110 +-
.../microsoft/AbstractPOIFSExtractor.java | 32 +-
.../tika/parser/microsoft/HSLFExtractor.java | 32 +-
.../parser/microsoft/JackcessExtractor.java | 4 +-
.../parser/microsoft/MSOwnerFileParser.java | 81 +
.../tika/parser/microsoft/OfficeParser.java | 2 +-
.../tika/parser/microsoft/WordExtractor.java | 22 +-
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 12 +-
.../ooxml/XSLFPowerPointExtractorDecorator.java | 58 +-
.../ooxml/XSSFExcelExtractorDecorator.java | 99 +-
.../microsoft/xml/AbstractXML2003Parser.java | 128 +
.../parser/microsoft/xml/HyperlinkHandler.java | 96 +
.../microsoft/xml/SpreadsheetMLParser.java | 175 ++
.../tika/parser/microsoft/xml/WordMLParser.java | 306 +++
.../parser/ner/grobid/GrobidNERecogniser.java | 28 +-
.../tika/parser/ocr/TesseractOCRParser.java | 87 +-
.../tika/parser/pdf/AbstractPDF2XHTML.java | 578 ++++
.../org/apache/tika/parser/pdf/OCR2XHTML.java | 127 +
.../org/apache/tika/parser/pdf/PDF2XHTML.java | 518 +---
.../org/apache/tika/parser/pdf/PDFParser.java | 7 +
.../apache/tika/parser/pdf/PDFParserConfig.java | 274 +-
.../tika/parser/pkg/ZipContainerDetector.java | 12 +
.../tika/parser/rtf/RTFEmbObjHandler.java | 7 +-
.../tika/parser/rtf/RTFObjDataParser.java | 43 +-
.../apache/tika/parser/rtf/TextExtractor.java | 11 +-
.../services/org.apache.tika.parser.Parser | 7 +-
.../apache/tika/parser/pdf/PDFParser.properties | 10 +-
.../tika/detect/TestContainerAwareDetector.java | 11 +
.../org/apache/tika/mime/TestMimeTypes.java | 38 +-
.../parser/apple/AppleSingleFileParserTest.java | 46 +
.../apache/tika/parser/dbf/DBFParserTest.java | 158 ++
.../apache/tika/parser/html/HtmlParserTest.java | 60 +-
.../parser/image/xmp/JempboxExtractorTest.java | 29 +-
.../tika/parser/mail/RFC822ParserTest.java | 115 +
.../tika/parser/microsoft/ExcelParserTest.java | 28 +-
.../parser/microsoft/MSOwnerFileParserTest.java | 31 +
.../microsoft/POIContainerExtractionTest.java | 4 +-
.../parser/microsoft/PowerPointParserTest.java | 13 +-
.../tika/parser/microsoft/WordParserTest.java | 19 +
.../ooxml/OOXMLContainerExtractionTest.java | 2 +-
.../parser/microsoft/ooxml/OOXMLParserTest.java | 43 +-
.../parser/microsoft/xml/XML2003ParserTest.java | 109 +
.../apache/tika/parser/pdf/PDFParserTest.java | 74 +-
.../apache/tika/parser/rtf/RTFParserTest.java | 127 +-
.../test-documents/testAppleSingleFile.pdf | Bin 0 -> 1893 bytes
.../test/resources/test-documents/testDBF.dbf | Bin 0 -> 890 bytes
.../test-documents/testDBF_gb18030.dbf | Bin 0 -> 144 bytes
.../test/resources/test-documents/testDJVU.djvu | Bin 0 -> 89 bytes
.../resources/test-documents/testEXCEL2003.xml | 100 +
.../test-documents/testEXCEL_hyperlinks.xls | Bin 0 -> 29696 bytes
.../test-documents/testEXCEL_hyperlinks.xlsx | Bin 0 -> 10038 bytes
.../resources/test-documents/testEXCEL_poi.xlsx | Bin 0 -> 3360 bytes
.../test-documents/testEndNoteImportFile.enw | 10 +
.../test-documents/testExcel_embeddedPDF.xls | Bin 0 -> 38400 bytes
.../test-documents/testExcel_embeddedPDF.xlsx | Bin 0 -> 25602 bytes
.../resources/test-documents/testICalendar.ics | 15 +
.../test-documents/testKeynote2013.key | Bin 0 -> 274397 bytes
.../resources/test-documents/testKeynoteNew.key | Bin 274397 -> 0 bytes
.../resources/test-documents/testMSOwnerFile | Bin 0 -> 162 bytes
.../test-documents/testNumbers2013.numbers | Bin 0 -> 179147 bytes
.../test-documents/testNumbersNew.numbers | Bin 179147 -> 0 bytes
.../test-documents/testPPT_EmbeddedPDF.ppt | Bin 0 -> 187392 bytes
.../test-documents/testPPT_EmbeddedPDF.pptx | Bin 0 -> 108637 bytes
.../test-documents/testPages2013.pages | Bin 0 -> 237567 bytes
.../resources/test-documents/testPagesNew.pages | Bin 237567 -> 0 bytes
.../test-documents/testRFC822_date_utf8 | 8 +
.../resources/test-documents/testRFC822_eml | 33 +
.../resources/test-documents/testVCalendar.vcs | 10 +
.../resources/test-documents/testWORD2003.xml | 2542 ++++++++++++++++++
.../test-documents/testWindowsMediaMeta.asx | 6 +
.../test/resources/test-documents/testXMP.xmp | 178 ++
.../test-documents/test_recursive_embedded.doc | Bin 0 -> 31744 bytes
tika-serialization/pom.xml | 2 +-
tika-server/pom.xml | 2 +-
tika-translate/pom.xml | 2 +-
.../translate/translator.yandex.properties | 2 +-
tika-xmp/pom.xml | 2 +-
114 files changed, 7822 insertions(+), 1203 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/fe559b80/tika-parsers/pom.xml
----------------------------------------------------------------------