You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/12/02 17:00:27 UTC
[tika] branch main updated: TIKA-3241 -- fix git add problems,
replace some test documents that were modified in the move, update poms
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new a43784b TIKA-3241 -- fix git add problems, replace some test documents that were modified in the move, update poms
a43784b is described below
commit a43784b19f6b0955478dded71521b0491d21c90b
Author: tallison <ta...@apache.org>
AuthorDate: Wed Dec 2 11:58:44 2020 -0500
TIKA-3241 -- fix git add problems, replace some test documents that were modified in the move, update poms
---
.../tika-parser-apple-module/pom.xml | 2 +-
.../tika/parser/apple/AppleSingleFileParser.java | 192 +
.../apache/tika/parser/apple/BPListDetector.java | 138 +
.../tika/parser/iwork/IWorkPackageParser.java | 220 +
.../tika/parser/iwork/KeynoteContentHandler.java | 176 +
.../tika/parser/iwork/NumbersContentHandler.java | 232 +
.../services/org.apache.tika.detect.Detector | 16 +
.../services/org.apache.tika.parser.Parser | 22 +
.../tika/parser/iwork/AutoPageNumberUtilsTest.java | 79 +
.../apache/tika/parser/iwork/IWorkParserTest.java | 392 +
.../test-documents/testAppleSingleFile.pdf | Bin 0 -> 1893 bytes
.../resources/test-documents/testKeynote2013.key | Bin 0 -> 274397 bytes
.../test-documents/testMasterSlideTable.key | Bin 0 -> 220184 bytes
.../resources/test-documents/testNumbers.numbers | Bin 0 -> 134571 bytes
.../test-documents/testNumbers2013.numbers | Bin 0 -> 179147 bytes
.../resources/test-documents/testPages2013.pages | Bin 0 -> 237567 bytes
.../test-documents/testPagesComments.pages | Bin 0 -> 154546 bytes
.../testPagesHeadersFootersFootnotes.pages | Bin 0 -> 177328 bytes
.../test-documents/testPagesPwdProtected.pages | Bin 0 -> 33166 bytes
.../test-documents/testWEBARCHIVE.webarchive | 646 +
.../tika-parser-audiovideo-module/pom.xml | 3 +-
.../org/apache/tika/parser/audio/MidiParser.java | 122 +
.../tika/parser/mp3/CompositeTagHandler.java | 142 +
.../java/org/apache/tika/parser/mp3/ID3Tags.java | 254 +
.../org/apache/tika/parser/mp3/ID3v1Handler.java | 183 +
.../org/apache/tika/parser/mp3/ID3v22Handler.java | 159 +
.../org/apache/tika/parser/mp3/ID3v23Handler.java | 138 +
.../org/apache/tika/parser/mp3/ID3v2Frame.java | 430 +
.../org/apache/tika/parser/mp3/LyricsHandler.java | 156 +
.../java/org/apache/tika/parser/mp3/Mp3Parser.java | 256 +
.../services/org.apache.tika.parser.Parser | 22 +
.../apache/tika/parser/audio/AudioParserTest.java | 75 +
.../apache/tika/parser/audio/MidiParserTest.java | 42 +
.../org/apache/tika/parser/mp3/Mp3ParserTest.java | 368 +
.../org/apache/tika/parser/mp3/MpegStreamTest.java | 166 +
.../src/test/resources/test-documents/test2.mp3 | Bin 0 -> 2668637 bytes
.../src/test/resources/test-documents/testAIFF.aif | Bin 0 -> 3894 bytes
.../src/test/resources/test-documents/testAU.au | Bin 0 -> 3868 bytes
.../src/test/resources/test-documents/testFLV.flv | Bin 0 -> 90580 bytes
.../test/resources/test-documents/testMP3i18n.mp3 | Bin 0 -> 40832 bytes
.../resources/test-documents/testMP3id3v1_v2.mp3 | Bin 0 -> 40960 bytes
.../test/resources/test-documents/testMP3id3v2.mp3 | Bin 0 -> 39577 bytes
.../test/resources/test-documents/testMP3noid3.mp3 | Bin 0 -> 39288 bytes
.../resources/test-documents/testMP3truncated.mp3 | Bin 0 -> 65536 bytes
.../resources/test-documents/testMP4_truncated.m4a | Bin 0 -> 74 bytes
.../pom.xml | 15 +-
.../java/org/apache/tika/parser/prt/PRTParser.java | 275 +
.../test/resources/test-documents/testDWG2010.dwg | Bin 0 -> 59562 bytes
.../tika-parser-code-module/pom.xml | 3 +-
.../org/apache/tika/parser/asm/ClassParser.java | 54 +
.../java/org/apache/tika/parser/mat/MatParser.java | 146 +
.../services/org.apache.tika.parser.Parser | 21 +
.../test-documents/AutoDetectParser.class | Bin 0 -> 3794 bytes
.../breidamerkurjokull_radar_profiles_2009.mat | Bin 27611304 -> 14748772 bytes
.../test-documents/test-columnar.sas7bdat | Bin 0 -> 131072 bytes
.../src/test/resources/test-documents/testC.c | 6 +
.../test/resources/test-documents/testJS_HTML.js | 91 +
.../resources/test-documents/testLinux-mips-32be | Bin 0 -> 8125 bytes
.../resources/test-documents/testLinux-mips-32le | Bin 0 -> 38051 bytes
.../resources/test-documents/testLinux-ppc-32be | Bin 0 -> 248480 bytes
.../test/resources/test-documents/testLinux-x86-32 | Bin 0 -> 7175 bytes
.../src/test/resources/test-documents/testMATLAB.m | 4 +
.../resources/test-documents/testMATLAB_barcast.m | 383 +
.../resources/test-documents/testMATLAB_wtsgaus.m | 52 +
.../src/test/resources/test-documents/testSAS.sas | 33 +
.../test/resources/test-documents/testSAS.sas7bdat | Bin 0 -> 17408 bytes
.../pom.xml | 20 +-
.../org/apache/tika/parser/crypto/Pkcs7Parser.java | 90 +
.../test/resources/test-documents/Test1.txt.tsd | Bin 0 -> 4967 bytes
.../test/resources/test-documents/Test2.txt.tsd | Bin 0 -> 4969 bytes
.../src/test/resources/test-documents/testCERT.der | Bin
.../src/test/resources/test-documents/testCERT.pem | 0
.../test/resources/test-documents/testDSAKEY.der | Bin
.../test/resources/test-documents/testDSAKEY.pem | 0
.../resources/test-documents/testDSAPARAMS.pem | 0
.../test/resources/test-documents/testDetached.p7s | Bin 0 -> 2941 bytes
.../test/resources/test-documents/testECKEY.der | Bin
.../test/resources/test-documents/testECKEY.pem | 0
.../test/resources/test-documents/testECPARAMS.pem | 0
.../test/resources/test-documents/testRSAKEY.der | Bin
.../test/resources/test-documents/testRSAKEY.pem | 0
.../test-documents/testTSD_broken_pdf.tsd | Bin 0 -> 91985 bytes
.../pom.xml | 26 +-
.../tika/parser/digestutils/CommonsDigester.java | 186 +
.../pom.xml | 12 +-
.../apache/tika/parser/font/TrueTypeParser.java | 117 +
.../services/org.apache.tika.parser.Parser | 18 +
.../tika-parser-html-module/pom.xml | 3 +-
.../tika/parser/html/BoilerpipeContentHandler.java | 363 +
.../org/apache/tika/parser/html/DataURIScheme.java | 77 +
.../apache/tika/parser/html/DataURISchemeUtil.java | 103 +
.../tika/parser/html/HtmlEncodingDetector.java | 188 +
.../org/apache/tika/parser/html/HtmlHandler.java | 462 +
.../org/apache/tika/parser/html/HtmlMapper.java | 69 +
.../org/apache/tika/parser/html/HtmlParser.java | 247 +
.../html/charsetdetector/CharsetAliases.java | 145 +
.../charsetdetector/CharsetDetectionResult.java | 62 +
.../parser/html/charsetdetector/PreScanner.java | 270 +
.../charsets/ReplacementCharset.java | 65 +
.../org.apache.tika.detect.EncodingDetector | 15 +
.../html/StandardCharsets_unsupported_by_IANA.txt | 139 +
.../tika/parser/html/DataURISchemeParserTest.java | 77 +
.../html/StandardHtmlEncodingDetectorTest.java | 378 +
.../org/apache/tika/parser/html/tika-config.xml | 30 +
.../resources/test-documents/big-preamble.html | 827 ++
.../test-documents/boilerplate-whitespace.html | 27 +
.../test/resources/test-documents/testHTML.html | 28 +
.../test-documents/testHTMLBadScript.html | 9 +
.../test-documents/testHTMLGoodScript.html | 9 +
...ing_3.html => testHTMLNoisyMetaEncoding_1.html} | 10 +-
...ing_3.html => testHTMLNoisyMetaEncoding_2.html} | 10 +-
.../testHTMLNoisyMetaEncoding_3.html | 8 +-
...ing_3.html => testHTMLNoisyMetaEncoding_4.html} | 8 +-
.../test-documents/testHTML_charset_utf16le.html | Bin 0 -> 380 bytes
.../test-documents/testHTML_charset_utf8.html | 4 +-
.../testHTML_embedded_data_uri_js.html | 11 +
.../test-documents/testHTML_embedded_img.html | 352 +
.../resources/test-documents/testHTML_utf8.html | 25 +
.../src/test/resources/test-documents/tika434.html | 914 ++
.../pom.xml | 59 +-
.../org/apache/tika/parser/image/HeifParser.java | 62 +
.../org/apache/tika/parser/image/ICNSParser.java | 128 +
.../tika/parser/image/ImageMetadataExtractor.java | 627 +
.../org/apache/tika/parser/image/PSDParser.java | 259 +
.../apache/tika/parser/image/ImageParserTest.java | 174 +
.../apache/tika/parser/image/JpegParserTest.java | 286 +
.../apache/tika/parser/image/PSDParserTest.java | 73 +
.../src/test/resources/test-documents/testBPG.bpg | Bin 0 -> 1824 bytes
.../testBPG_commented_xnviewmp026.bpg | Bin 0 -> 12374 bytes
.../src/test/resources/test-documents/testGIF.gif | Bin 0 -> 8495 bytes
.../test/resources/test-documents/testHEIF.heic | Bin 0 -> 13706 bytes
.../resources/test-documents/testICNS_basic.icns | Bin 0 -> 18199 bytes
.../test/resources/test-documents/testJBIG2.jb2 | Bin 0 -> 346 bytes
.../src/test/resources/test-documents/testJPEG.jp2 | Bin 0 -> 25725 bytes
.../src/test/resources/test-documents/testJPEG.jpg | Bin 0 -> 7686 bytes
.../resources/test-documents/testJPEG_EXIF.jpg | Bin 0 -> 16357 bytes
.../test-documents/testJPEG_oddTagComponent.jpg | Bin 0 -> 8330 bytes
.../src/test/resources/test-documents/testPNG.png | Bin 0 -> 17041 bytes
.../src/test/resources/test-documents/testTIFF.tif | Bin 0 -> 25584 bytes
.../test/resources/test-documents/testWEBP.webp | Bin 0 -> 3442 bytes
.../test-documents/testWebp_Alpha_Lossless.webp | Bin 0 -> 92312 bytes
.../test-documents/testWebp_Alpha_Lossy.webp | Bin 0 -> 23404 bytes
.../tika-parser-integration-tests/pom.xml | 331 +
.../apache/tika/config/TikaParserConfigTest.java | 157 +
.../tika/config/TikaTranslatorConfigTest.java | 72 +
.../tika/detect/TestContainerAwareDetector.java | 572 +
.../tika/extractor/EmbeddedDocumentUtilTest.java | 43 +
.../java/org/apache/tika/mime/TestMimeTypes.java | 1336 ++
.../tika/parser/AutoDetectReaderParserTest.java | 102 +
.../parser/BouncyCastleDigestingParserTest.java | 268 +
.../tika/parser/RecursiveParserWrapperTest.java | 456 +
.../apache/tika/parser/TestXMLEntityExpansion.java | 151 +
.../parser/apple/AppleSingleFileParserTest.java | 47 +
.../apache/tika/parser/apple/PListParserTest.java | 40 +
.../apache/tika/parser/html/HtmlParserTest.java | 66 +
.../tika/parser/microsoft/EMFParserTest.java | 52 +
.../tika/parser/microsoft/ExcelParserTest.java | 32 +
.../parser/microsoft/ooxml/TruncatedOOXMLTest.java | 49 +
.../tika/parser/ocr/TesseractOCRParserTest.java | 322 +
.../apache/tika/parser/pkg/CompressParserTest.java | 79 +
.../org/apache/tika/parser/pkg/GzipParserTest.java | 78 +
.../org/apache/tika/parser/pkg/TarParserTest.java | 66 +
.../org/apache/tika/parser/pkg/ZlibParserTest.java | 46 +
.../tika/parser/xml/FictionBookParserTest.java | 47 +
.../sax/StandardsExtractingContentHandlerTest.java | 53 +
.../apache/tika/utils/ServiceLoaderUtilsTest.java | 57 +
.../tika/config/TIKA-1702-translator-default.xml | 24 +
.../config/TIKA-1702-translator-empty-default.xml | 22 +
.../tika/config/TIKA-1702-translator-empty.xml | 20 +
.../tika/config/TIKA-1708-detector-composite.xml | 25 +
...TIKA-2273-exclude-encoding-detector-default.xml | 29 +
.../TIKA-2273-parameterize-encoding-detector.xml | 30 +
.../org/apache/tika/parser/TIKA-3137-include.xml | 38 +
.../CVLKRA-KYC_Download_File_Structure_V3.1.xlsx | Bin 0 -> 204480 bytes
.../test/resources/test-documents/NUTCH-1997.cbor | 30 +
.../active_layer_arcss_grid_barrow_alaska_2012.dif | 61 +
.../test-documents/mock/null_pointer_no_msg.xml} | 18 +-
.../resources/test-documents/mock/real_oom.xml} | 17 +-
.../resources/test-documents/mock/system_exit.xml} | 18 +-
.../test-documents/mock/thread_interrupt.xml} | 18 +-
.../resources/test-documents/test-columnar.ods | Bin 0 -> 12854 bytes
.../resources/test-documents/test-columnar.xpt | Bin 0 -> 4720 bytes
.../src/test/resources/test-documents/test1.swf | Bin 0 -> 21054 bytes
.../test/resources/test-documents/testAMR-WB.amr | Bin 0 -> 3609 bytes
.../src/test/resources/test-documents/testAPK.apk | Bin 0 -> 11740 bytes
.../resources/test-documents/testBDB_btree_2.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_btree_3.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_btree_4.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_btree_5.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_hash_2.db | Bin 0 -> 12288 bytes
.../src/test/resources/test-documents/testCERT.pem | 0
.../src/test/resources/test-documents/testCSS.css | 48 +
.../test/resources/test-documents/testDITA.dita | 34 +
.../test/resources/test-documents/testDITA2.dita | 33 +
.../test/resources/test-documents/testDSAKEY.der | Bin
.../test/resources/test-documents/testDSAKEY.pem | 0
.../src/test/resources/test-documents/testEAC3.ac3 | Bin 0 -> 768 bytes
.../src/test/resources/test-documents/testEAR.ear | Bin 0 -> 1086 bytes
.../test/resources/test-documents/testECKEY.pem | 0
.../test/resources/test-documents/testECPARAMS.pem | 0
.../src/test/resources/test-documents/testFLAC.oga | Bin 0 -> 10820 bytes
.../resources/test-documents/testGRAPHVIZd.dot | 6 +
.../resources/test-documents/testGRAPHVIZdc.dot | 9 +
.../src/test/resources/test-documents/testHFA.hfa | Bin 0 -> 1024 bytes
.../resources/test-documents/testICalendar.ics | 15 +
.../test/resources/test-documents/testINDD.indd | Bin 0 -> 880640 bytes
.../test-documents/testJAVAPROPS.properties | 22 +
.../resources/test-documents/testJavaHprofBinary | Bin 0 -> 88489 bytes
.../resources/test-documents/testJavaHprofText | 2193 ++++
.../test-documents/testLotus123-lotusftp.wk4 | Bin 0 -> 6168 bytes
.../test/resources/test-documents/testLotus123.wk1 | Bin 0 -> 24291 bytes
.../test/resources/test-documents/testLotusEml.eml | 71 +
.../test-documents/testMHTMLFirefox.mhtml | 455 +
.../src/test/resources/test-documents/testMKV.mkv | Bin 0 -> 82969 bytes
.../test/resources/test-documents/testMYSQL.MYI | Bin 0 -> 1024 bytes
.../test-documents/testOptionalHyphen.doc | Bin 0 -> 22016 bytes
.../test-documents/testOptionalHyphen.ppt | Bin 0 -> 100864 bytes
.../test-documents/testOptionalHyphen.pptx | Bin 0 -> 33173 bytes
.../test-documents/testOptionalHyphen.rtf | 158 +
.../src/test/resources/test-documents/testPICT.pct | Bin 0 -> 23454 bytes
.../test-documents/testPKCS17Sig-v4.xml.p7m | 1606 +++
.../resources/test-documents/testPKCS17Sig.xml.p7m | 4333 +++++++
.../test-documents/testPhoneNumberExtractor.odt | Bin 0 -> 15244 bytes
.../src/test/resources/test-documents/testRDF.rdf | 23 +
.../resources/test-documents/testSolaris-x86-32 | Bin 0 -> 6404 bytes
.../test-documents/testStarOffice-6.0-calc.sxc | Bin 0 -> 7406 bytes
.../test-documents/testStarOffice-6.0-writer.sxw | Bin 0 -> 5200 bytes
.../test/resources/test-documents/testStataDTA.dta | Bin 0 -> 1207 bytes
.../test/resources/test-documents/testStataDTA.txt | 15 +
.../resources/test-documents/testTAR_no_magic.tar | Bin 0 -> 156160 bytes
.../test-documents/testTXTNonASCIIUTF8.txt | 7 +
.../test-documents/testThunderbirdEml.eml | 32 +
.../test/resources/test-documents/testVORBIS.ogg | Bin 0 -> 4241 bytes
.../test-documents/testVORDrawTemplate.vor | Bin 0 -> 29696 bytes
.../test-documents/testVORWriterTemplate.vor | Bin 0 -> 8192 bytes
.../test/resources/test-documents/testWMV_WMV2.wmv | Bin 0 -> 554297 bytes
.../test/resources/test-documents/testWORKS.wps | Bin 0 -> 9728 bytes
.../resources/test-documents/testWORKS2000.wps | Bin 0 -> 5120 bytes
.../test-documents/testWORKSWordProcessor3.0.wps | Bin 0 -> 3072 bytes
.../resources/test-documents/testWebVTT_simple.vtt | 10 +
.../test-documents/testWindowsMediaMeta.asx | 6 +
.../src/test/resources/test-documents/testXDP.xdp | 5 +
.../test/resources/test-documents/testXFDF.xfdf | 7 +
.../tika-parser-jdbc-commons/pom.xml | 3 +-
.../tika-parser-mail-commons/pom.xml | 3 +-
.../apache/tika/parser/mailcommons/MailUtil.java | 116 +
.../tika/parser/mailcommons/MailUtilTest.java | 56 +
.../pom.xml | 45 +-
.../tika/parser/mail/MailContentHandler.java | 657 +
.../org/apache/tika/parser/mail/RFC822Parser.java | 133 +
.../apache/tika/parser/mail/RFC822ParserTest.java | 603 +
.../mail/tika-config-extract-all-alternatives.xml | 30 +
.../test/resources/test-documents/multiline.mbox | 5 +
.../src/test/resources/test-documents/quoted.mbox | 4 +
.../src/test/resources/test-documents/simple.mbox | 7 +
.../test-documents/testEmailWithPNGAtt.eml | 354 +
.../resources/test-documents/testGroupWiseEml.eml | 58 +
.../src/test/resources/test-documents/testRFC822 | 41 +
.../resources/test-documents/testRFC822-txt-body | 35 +
.../resources/test-documents/testRFC822_base64 | 8 +
.../resources/test-documents/testRFC822_date_utf8 | 8 +
.../test-documents/testRFC822_encrypted_zip | 61 +
.../test-documents/testRFC822_i18nheaders | 9 +
.../resources/test-documents/testRFC822_oddfrom | 2105 +++
.../resources/test-documents/testRFC822_quoted | 13 +
.../tika-parser-microsoft-module/pom.xml | 3 +-
.../parser/microsoft/MSOwnerFileParserTest.java | 31 +
.../tika/parser/microsoft/OfficeParserTest.java | 46 +
.../tika/parser/microsoft/OldExcelParserTest.java | 122 +
.../parser/microsoft/SolidworksParserTest.java | 189 +
.../tika/parser/microsoft/VisioParserTest.java | 51 +
.../parser/microsoft/chm/TestChmBlockInfo.java | 116 +
.../parser/microsoft/chm/TestChmExtractor.java | 76 +
.../parser/microsoft/chm/TestChmItsfHeader.java | 119 +
.../tika/parser/microsoft/chm/TestChmLzxState.java | 95 +
.../microsoft/chm/TestChmLzxcControlData.java | 139 +
.../microsoft/chm/TestDirectoryListingEntry.java | 85 +
.../tika/parser/microsoft/chm/TestPmgiHeader.java | 44 +
.../parser/microsoft/ooxml/OOXMLParserTest.java | 1771 +++
.../parser/microsoft/ooxml/SXSLFExtractorTest.java | 632 +
.../parser/microsoft/ooxml/xps/XPSParserTest.java | 97 +
.../ooxml/xwpf/ml2006/Word2006MLParserTest.java | 202 +
.../tika/parser/microsoft/rtf/RTFParserTest.java | 501 +
.../parser/microsoft/xml/XML2003ParserTest.java | 93 +
.../microsoft/ooxml/tika-config-sax-macros.xml | 34 +
.../tika/parser/microsoft/rtf/tika-config.xml | 26 +
.../tika-config-extract-all-alternatives-msg.xml | 30 +
.../tika/parser/microsoft/tika-config-macros.xml | 32 +
.../resources/test-documents/EmbeddedDocument.docx | Bin 0 -> 13219 bytes
.../resources/test-documents/EmbeddedOutlook.docx | Bin 0 -> 113242 bytes
.../test/resources/test-documents/NullHeader.docx | Bin 0 -> 4355 bytes
.../test/resources/test-documents/chm/IMJPCLE.CHM | Bin 0 -> 256718 bytes
.../test/resources/test-documents/chm/admin.chm | Bin 0 -> 49749 bytes
.../test/resources/test-documents/chm/cmak_ops.CHM | Bin 0 -> 82895 bytes
.../resources/test-documents/chm/wmicontrol.CHM | Bin 0 -> 32096 bytes
.../test/resources/test-documents/headerPic.docx | Bin 0 -> 16206 bytes
.../src/test/resources/test-documents/pictures.ppt | Bin 0 -> 75776 bytes
.../src/test/resources/test-documents/protect.xlsx | Bin 0 -> 12968 bytes
.../resources/test-documents/protectedFile.xlsx | Bin 0 -> 12968 bytes
.../resources/test-documents/test-columnar.xlsb | Bin 0 -> 9691 bytes
.../src/test/resources/test-documents/test.doc | Bin 0 -> 9216 bytes
.../resources/test-documents/testAccess_V1997.mdb | Bin 0 -> 118784 bytes
.../test-documents/testBinControlWord.rtf | 2 +
.../src/test/resources/test-documents/testChm.chm | Bin 0 -> 186259 bytes
.../src/test/resources/test-documents/testChm3.chm | Bin 0 -> 900481 bytes
.../test/resources/test-documents/testComment.ppt | Bin 0 -> 101376 bytes
.../test/resources/test-documents/testComment.pptx | Bin 0 -> 34979 bytes
.../test/resources/test-documents/testDOTM.dotm | Bin 0 -> 65527 bytes
.../resources/test-documents/testDocumentLink.doc | Bin 0 -> 812032 bytes
.../resources/test-documents/testEXCEL-charts.xls | Bin 0 -> 15360 bytes
.../resources/test-documents/testEXCEL.strict.xlsx | Bin 0 -> 10006 bytes
.../test/resources/test-documents/testEXCEL_4.xls | Bin 0 -> 39942 bytes
.../testEXCEL_WORKBOOK_in_capitals.xls | Bin 0 -> 64512 bytes
.../test-documents/testEXCEL_big_numbers.xls | Bin 0 -> 26112 bytes
.../test-documents/testEXCEL_custom_props.xlsx | Bin 0 -> 9230 bytes
.../test-documents/testEXCEL_dateFormats.xlsx | Bin 0 -> 8766 bytes
.../test-documents/testEXCEL_diagramData.xlsx | Bin 0 -> 16654 bytes
.../test-documents/testEXCEL_embeddedPDF_mac.xlsx | Bin 0 -> 80578 bytes
.../testEXCEL_embeddedPDF_windows.xls | Bin 0 -> 61952 bytes
.../resources/test-documents/testEXCEL_embeded.xls | Bin 0 -> 303104 bytes
.../test-documents/testEXCEL_headers_footers.xls | Bin 0 -> 33792 bytes
.../test-documents/testEXCEL_hyperlinks.xls | Bin 0 -> 29696 bytes
.../testEXCEL_labels-govdocs-515858.xls | Bin 0 -> 57856 bytes
.../testEXCEL_macro_enabled_template.xltm | Bin 0 -> 8619 bytes
.../test-documents/testEXCEL_poi-61034.xlsx | Bin 0 -> 32774 bytes
.../testEXCEL_protected_passtika.xls | Bin 0 -> 17408 bytes
.../testEXCEL_protected_passtika.xlsx | Bin 0 -> 12800 bytes
.../testEXCEL_protected_passtika_2.xlsx | Bin 0 -> 15872 bytes
.../test-documents/testEXCEL_template.xltx | Bin 0 -> 8589 bytes
.../test-documents/testExcel_embeddedPDF.xlsx | Bin 0 -> 25602 bytes
.../resources/test-documents/testException1.doc | Bin 0 -> 49152 bytes
.../test-documents/testMSChart-govdocs-428996.xls | Bin 0 -> 35328 bytes
.../test-documents/testMSChart-govdocs-428996.xlsx | Bin 0 -> 17112 bytes
.../src/test/resources/test-documents/testMSG.msg | Bin 0 -> 20480 bytes
.../test-documents/testMSG_Appointment.msg | Bin 0 -> 30208 bytes
.../test/resources/test-documents/testMSG_Post.msg | Bin 0 -> 21504 bytes
.../resources/test-documents/testMSG_chinese.msg | Bin 0 -> 48129 bytes
.../resources/test-documents/testMSG_forwarded.msg | Bin 0 -> 25600 bytes
.../test/resources/test-documents/testMSOwnerFile | Bin 0 -> 162 bytes
.../test/resources/test-documents/testOneNote.one | Bin 0 -> 30288 bytes
.../test/resources/test-documents/testOneNote1.one | Bin 0 -> 360280 bytes
.../test-documents/testOneNote2007OrEarlier1.one | Bin 0 -> 1246998 bytes
.../test/resources/test-documents/testOneNote3.one | Bin 0 -> 35344 bytes
.../src/test/resources/test-documents/testPPM.ppm | 4 +
.../src/test/resources/test-documents/testPPT.potm | Bin 0 -> 40102 bytes
.../src/test/resources/test-documents/testPPT.ppsm | Bin 0 -> 36545 bytes
.../src/test/resources/test-documents/testPPT.ppsx | Bin 0 -> 36521 bytes
.../src/test/resources/test-documents/testPPT.thmx | Bin 0 -> 42485 bytes
.../testPPTX_overlappingRelations.pptx | Bin 0 -> 38135 bytes
.../resources/test-documents/testPPT_2imgs.ppt | Bin 0 -> 124928 bytes
.../resources/test-documents/testPPT_2imgs.pptx | Bin 0 -> 59246 bytes
.../test-documents/testPPT_EmbeddedPDF.ppt | Bin 0 -> 187392 bytes
.../test-documents/testPPT_EmbeddedPDF.pptx | Bin 0 -> 108637 bytes
.../resources/test-documents/testPPT_comment.pptx | Bin 0 -> 30939 bytes
.../test-documents/testPPT_custom_props.ppt | Bin 0 -> 104960 bytes
.../test-documents/testPPT_diagramData.pptx | Bin 0 -> 48793 bytes
.../resources/test-documents/testPPT_embedded2.ppt | Bin 0 -> 92160 bytes
.../test-documents/testPPT_embeddedMP3.pptx | Bin 0 -> 84434 bytes
.../testPPT_embedded_two_slides.pptx | Bin 0 -> 255364 bytes
.../resources/test-documents/testPPT_embeded.ppt | Bin 0 -> 224768 bytes
.../resources/test-documents/testPPT_embeded.pptx | Bin 0 -> 202969 bytes
.../resources/test-documents/testPPT_groups.ppt | Bin 0 -> 161792 bytes
.../resources/test-documents/testPPT_macros.ppt | Bin 0 -> 88064 bytes
.../test-documents/testPPT_masterFooter.pptx | Bin 0 -> 35128 bytes
.../test-documents/testPPT_masterText.pptx | Bin 0 -> 32270 bytes
.../test-documents/testPPT_masterText2.ppt | Bin 0 -> 102912 bytes
.../test-documents/testPPT_masterText2.pptx | Bin 0 -> 32291 bytes
.../test-documents/testPPT_oleWorkbook.ppt | Bin 0 -> 98304 bytes
.../test-documents/testPPT_oleWorkbook.pptx | Bin 0 -> 44001 bytes
.../test-documents/testPPT_protected_passtika.pptx | Bin 0 -> 41472 bytes
.../resources/test-documents/testPPT_various.ppt | Bin 0 -> 160768 bytes
.../resources/test-documents/testPPT_various.pptx | Bin 0 -> 56659 bytes
.../resources/test-documents/testPROJECT2007.mpp | Bin 0 -> 147968 bytes
.../test-documents/testPST_variousBodyTypes.pst | Bin 0 -> 271360 bytes
.../resources/test-documents/testPUBLISHER.pub | Bin 0 -> 65536 bytes
.../resources/test-documents/testRTFBoldItalic.rtf | 164 +
.../resources/test-documents/testRTFBoldPlain.rtf | 17 +
.../test-documents/testRTFEmbeddedFiles.rtf | 6856 ++++++++++
.../resources/test-documents/testRTFHyperlink.rtf | 598 +
.../test-documents/testRTFIgnoredControlWord.rtf | 17 +
.../test-documents/testRTFInvalidUnicode.rtf | 11 +
.../test-documents/testRTFListMicrosoftWord.rtf | 227 +
.../resources/test-documents/testRTFTIKA_2883.rtf | Bin 0 -> 1526 bytes
.../test-documents/testRTFTableCellSeparation2.rtf | 3 +
...stRTFUnicodeUCNControlWordCharacterDoubling.rtf | 8 +
.../testRTFWord2010CzechCharacters.rtf | 190 +
.../testRTFWordPadCzechCharacters.rtf | 5 +
.../test/resources/test-documents/testVISIO.vsdm | Bin 0 -> 32360 bytes
.../test/resources/test-documents/testVISIO.vssm | Bin 0 -> 32358 bytes
.../test/resources/test-documents/testVISIO.vssx | Bin 0 -> 32349 bytes
.../test/resources/test-documents/testVISIO.vstx | Bin 0 -> 32350 bytes
.../test/resources/test-documents/testWINMAIL.dat | Bin 0 -> 66276 bytes
.../src/test/resources/test-documents/testWMF.wmf | Bin 0 -> 51590 bytes
.../resources/test-documents/testWMF_charset.wmf | Bin 0 -> 9316 bytes
.../test/resources/test-documents/testWORD2003.xml | 2542 ++++
.../resources/test-documents/testWORD_1img.docx | Bin 0 -> 8325 bytes
.../resources/test-documents/testWORD_3imgs.doc | Bin 0 -> 36352 bytes
.../test-documents/testWORD_boldHyperlink.docx | Bin 0 -> 12382 bytes
.../resources/test-documents/testWORD_charts.docx | Bin 0 -> 15586 bytes
.../testWORD_closingSmartQInHyperLink.doc | Bin 0 -> 26624 bytes
.../test-documents/testWORD_custom_props.docx | Bin 0 -> 13942 bytes
.../test-documents/testWORD_docSecurity.docx | Bin 0 -> 12861 bytes
.../test-documents/testWORD_embedded_pdf.doc | Bin 0 -> 1491456 bytes
.../test-documents/testWORD_embedded_pics.docx | Bin 0 -> 52399 bytes
.../test-documents/testWORD_header_hyperlink.doc | Bin 0 -> 22528 bytes
.../resources/test-documents/testWORD_macros.doc | Bin 0 -> 38400 bytes
.../resources/test-documents/testWORD_macros.docm | Bin 0 -> 17322 bytes
.../testWORD_missing_ooxml_bean1.docx | Bin 0 -> 17913 bytes
.../test-documents/testWORD_multi_authors.doc | Bin 0 -> 22528 bytes
.../test-documents/testWORD_numbered_list.doc | Bin 0 -> 44032 bytes
.../testWORD_override_list_numbering.doc | Bin 0 -> 56320 bytes
.../testWORD_override_list_numbering.docx | Bin 0 -> 15746 bytes
.../resources/test-documents/testWORD_phonetic.doc | Bin 0 -> 27136 bytes
.../test-documents/testWORD_phonetic.docx | Bin 0 -> 12523 bytes
.../resources/test-documents/testWORD_signed.docx | Bin 0 -> 18245 bytes
.../testWORD_totalTimeOutOfRange.docx | Bin 0 -> 11047 bytes
.../resources/test-documents/testWORD_various.doc | Bin 0 -> 17408 bytes
.../test-documents/testWORKSSpreadsheet7.0.xlr | Bin 0 -> 10752 bytes
.../test/resources/test-documents/testWordArt.pptx | Bin 0 -> 37792 bytes
.../test-documents/test_embedded_zip.pptx | Bin 0 -> 345027 bytes
.../test-documents/test_list_override.rtf | 21 +
.../test-documents/test_recursive_embedded.docx | Bin 0 -> 27082 bytes
.../testsolidworksDrawing2014SP0.SLDDRW | Bin 0 -> 201216 bytes
.../testsolidworksPart2014SP0.SLDPRT | Bin 0 -> 1043456 bytes
.../tika-parser-miscoffice-module/pom.xml | 3 +-
.../java/org/apache/tika/parser/dbf/DBFCell.java | 147 +
.../org/apache/tika/parser/dbf/DBFFileHeader.java | 144 +
.../apache/tika/parser/epub/EpubContentParser.java | 56 +
.../org/apache/tika/parser/epub/EpubParser.java | 496 +
.../apache/tika/parser/hwp/HwpTextExtractorV5.java | 514 +
.../apache/tika/parser/mif/MIFContentHandler.java | 122 +
.../org/apache/tika/parser/mif/MIFExtractor.java | 179 +
.../parser/odf/NSNormalizerContentHandler.java | 99 +
.../tika/parser/odf/OpenDocumentContentParser.java | 70 +
.../tika/parser/odf/OpenDocumentMacroHandler.java | 60 +
.../tika/parser/wordperfect/QPWTextExtractor.java | 225 +
.../tika/parser/wordperfect/QuattroProParser.java | 72 +
.../tika/parser/wordperfect/WP5Charsets.java | 203 +
.../wordperfect/WPDocumentAreaExtractor.java | 88 +
.../tika/parser/wordperfect/WPInputStream.java | 224 +
.../parser/wordperfect/WPPrefixAreaExtractor.java | 67 +
.../org/apache/tika/parser/dbf/DBFParserTest.java | 150 +
.../apache/tika/parser/hwp/HwpV5ParserTest.java | 83 +
.../tika/parser/wordperfect/QuattroProTest.java | 48 +
.../tika/parser/wordperfect/WPInputStreamTest.java | 127 +
.../org/apache/tika/parser/epub/tika-config.xml | 26 +
.../src/test/resources/test-documents/testDBF.dbf | Bin 0 -> 890 bytes
.../resources/test-documents/testDBF_gb18030.dbf | Bin 0 -> 144 bytes
.../test/resources/test-documents/testFooter.ods | Bin 0 -> 7207 bytes
.../resources/test-documents/testFramemakerMif.mif | 12955 +++++++++++++++++++
.../test/resources/test-documents/testHWP_3.0.hwp | Bin 0 -> 9287 bytes
.../resources/test-documents/testODPMacro.fodp | 781 ++
.../test/resources/test-documents/testODPMacro.odp | Bin 0 -> 14505 bytes
.../test/resources/test-documents/testODP_NPE.odp | Bin 0 -> 431290 bytes
.../resources/test-documents/testODTMacro.fodt | 633 +
.../test/resources/test-documents/testODTMacro.odt | Bin 0 -> 30809 bytes
.../resources/test-documents/testODTStyles2.odt | Bin 0 -> 17383 bytes
.../resources/test-documents/testODTStyles3.odt | Bin 0 -> 17140 bytes
.../resources/test-documents/testOpenOffice2.odf | Bin 0 -> 10977 bytes
.../test/resources/test-documents/testQuattro.wq2 | Bin 0 -> 7938 bytes
.../test/resources/test-documents/testStyles.odt | Bin 0 -> 11663 bytes
.../test-documents/testWordPerfect_42.doc | Bin 0 -> 725 bytes
.../test-documents/testWordPerfect_5_0.wp | Bin 0 -> 9915 bytes
.../test-documents/testWordPerfect_5_1.wp | Bin 0 -> 18267 bytes
.../resources/test-documents/testiBooks.ibooks | Bin 0 -> 970636 bytes
.../tika-parser-news-module/pom.xml | 3 +-
.../apache/tika/parser/iptc/IptcAnpaParser.java | 808 ++
.../services/org.apache.tika.parser.Parser | 17 +
.../test/resources/test-documents/testATOM.atom | 27 +
.../tika-parser-ocr-module/pom.xml | 3 +-
.../apache/tika/parser/ocr/TesseractOCRConfig.java | 689 +
.../services/org.apache.tika.parser.Parser | 15 +
.../tika/parser/ocr/TesseractOCRConfig.properties | 36 +
.../org/apache/tika/config/TIKA-2705-tesseract.xml | 33 +
.../src/test/resources/test-documents/testOCR.docx | Bin 0 -> 62041 bytes
.../src/test/resources/test-documents/testOCR.jpg | Bin 0 -> 3408 bytes
.../src/test/resources/test-documents/testOCR.pptx | Bin 0 -> 78550 bytes
.../tika-parser-pdf-module/pom.xml | 3 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 1001 ++
.../org/apache/tika/parser/pdf/AccessChecker.java | 100 +
.../apache/tika/parser/pdf/AccessCheckerTest.java | 137 +
.../tika/parser/pdf/PDFPreflightParserTest.java | 58 +
.../org/apache/tika/parser/pdf/tika-config.xml | 26 +
.../tika/parser/pdf/tika-preflight-config.xml | 25 +
.../resources/test-documents/testAnnotations.pdf | Bin 0 -> 18580 bytes
.../test-documents/testOptionalHyphen.pdf | Bin 0 -> 44954 bytes
.../test-documents/testPDFFileEmbInAnnotation.pdf | Bin 0 -> 97211 bytes
.../test-documents/testPDFTripleLangTitle.pdf | Bin 0 -> 1719 bytes
.../test-documents/testPDFTwoTextBoxes.pdf | Bin 0 -> 57100 bytes
.../resources/test-documents/testPDFVarious.pdf | Bin 0 -> 205491 bytes
.../testPDF_Version.11.x.PDFA-1b.pdf | Bin 0 -> 23081 bytes
.../test-documents/testPDF_Version.6.x.pdf | Bin 0 -> 5903 bytes
.../test-documents/testPDF_Version.8.x.pdf | Bin 0 -> 5903 bytes
.../test-documents/testPDF_bad_page_303226.pdf | Bin 0 -> 138027 bytes
.../resources/test-documents/testPDF_bookmarks.pdf | Bin 0 -> 9487 bytes
.../test-documents/testPDF_diffTitles.pdf | 261 +
...DF_no_extract_yes_accessibility_owner_empty.pdf | 87 +
.../resources/test-documents/testPDF_protected.pdf | Bin 0 -> 506064 bytes
.../test-documents/testPDF_twoAuthors.pdf | Bin 0 -> 12628 bytes
.../test-documents/testPopupAnnotation.pdf | Bin 0 -> 9081 bytes
.../tika-parser-pkg-module/pom.xml | 3 +-
.../apache/tika/parser/pkg/AbstractPkgTest.java | 90 +
.../apache/tika/parser/pkg/CompressParserTest.java | 72 +
.../apache/tika/parser/pkg/PackageParserTest.java | 81 +
.../org/apache/tika/parser/pkg/RarParserTest.java | 122 +
.../org/apache/tika/parser/pkg/TarParserTest.java | 77 +
.../src/test/resources/test-documents/TIKA-216.tgz | Bin 0 -> 1270 bytes
.../resources/test-documents/full_encrypted.7z | Bin 0 -> 198 bytes
.../src/test/resources/test-documents/moby.zip | Bin 0 -> 606033 bytes
.../src/test/resources/test-documents/quine.gz | Bin 0 -> 204 bytes
.../test-documents/test-documents-enc.rar | Bin 0 -> 68636 bytes
.../test-documents/test-documents-spanned.z01 | Bin 0 -> 65536 bytes
.../test-documents/test-documents-spanned.zip | Bin 0 -> 3488 bytes
.../resources/test-documents/test-documents.cpio | Bin 0 -> 116224 bytes
.../resources/test-documents/test-zip-of-zip.zip | Bin 0 -> 299 bytes
.../test/resources/test-documents/testARofText.ar | 5 +
.../resources/test-documents/testJAR_with_HTML.jar | Bin 0 -> 5594 bytes
.../src/test/resources/test-documents/testLZMA_oom | Bin 0 -> 19 bytes
.../src/test/resources/test-documents/testSVG.svgz | Bin 0 -> 222 bytes
.../resources/test-documents/testSnappy-framed.sz | Bin 0 -> 58586 bytes
.../test/resources/test-documents/testZSTD.zstd | Bin 0 -> 143 bytes
.../src/test/resources/test-documents/testZ_oom.Z | 1 +
.../testZip_with_DataDescriptor2.zip | Bin 0 -> 1987 bytes
.../tika-parser-text-module/pom.xml | 3 +-
.../apache/tika/parser/strings/StringsConfig.java | 187 +
.../tika/parser/strings/StringsEncoding.java | 45 +
.../org/apache/tika/parser/txt/CharsetMatch.java | 267 +
.../apache/tika/parser/txt/CharsetRecog_sbcs.java | 1356 ++
.../tika/parser/txt/UniversalEncodingListener.java | 113 +
.../services/org.apache.tika.parser.Parser | 22 +
.../parser/strings/Latin1StringsParserTest.java | 69 +
.../tika/parser/strings/StringsConfigTest.java | 61 +
.../resources/test-documents/english.cp500.txt | 1 +
.../resources/test-documents/multi-language.txt | 38 +-
.../src/test/resources/test-documents/resume.html | 73 +
.../resources/test-documents/russian.cp866.txt | 12 +-
.../resources/test-documents/testTXT_win-1252.txt | 1 +
.../resources/test-documents/testVCalendar.vcs | 10 +
.../test-properties/StringsConfig-full.properties | 18 +
.../tika-parser-xml-module/pom.xml | 3 +-
.../tika/parser/xliff/XLIFF12ContentHandler.java | 133 +
.../org/apache/tika/parser/xliff/XLZParser.java | 146 +
.../org/apache/tika/parser/xml/DcXMLParser.java | 60 +
.../tika/parser/xml/ElementMetadataHandler.java | 241 +
.../apache/tika/parser/xml/FictionBookParser.java | 114 +
.../apache/tika/parser/xml/MetadataHandler.java | 85 +
.../java/org/apache/tika/parser/xml/XMLParser.java | 92 +
.../tika/parser/xliff/XLIFF12ParserTest.java | 54 +
.../tika/parser/xml/FictionBookParserTest.java | 42 +
.../parser/xml/TextAndAttributeXMLParserTest.java | 53 +
.../test/resources/test-documents/testXLIFF12.xlz | Bin 0 -> 1004 bytes
.../src/test/resources/test-documents/testXXE.xml | 4 +
.../tika-parser-xmp-commons/pom.xml | 3 +-
.../test-documents/testJPEG_commented.jpg | Bin 0 -> 13325 bytes
.../testJPEG_commented_pspcs2mac.jpg | Bin 0 -> 26173 bytes
.../pom.xml | 14 +-
.../tika/detect/zip/CompressorConstants.java | 77 +
.../detect/zip/DeprecatedZipContainerDetector.java | 39 +
.../org/apache/tika/detect/zip/IPADetector.java | 119 +
.../org/apache/tika/detect/zip/KMZDetector.java | 98 +
.../apache/tika/detect/zip/StarOfficeDetector.java | 144 +
.../tika/detect/zip/StreamingDetectContext.java | 78 +
.../tika/detect/zip/ZipContainerDetector.java | 66 +
.../services/org.apache.tika.detect.Detector | 16 +
.../org/apache/tika/detect/zip/ZipParserTest.java | 45 +
565 files changed, 71447 insertions(+), 206 deletions(-)
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/pom.xml b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/pom.xml
index 4a8c390..1b836af 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/pom.xml
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/pom.xml
@@ -21,7 +21,7 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
- <artifactId>tika-parser-modules</artifactId>
+ <artifactId>tika-parsers-classic-modules</artifactId>
<groupId>org.apache.tika</groupId>
<version>2.0.0-SNAPSHOT</version>
</parent>
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
new file mode 100644
index 0000000..a0b8a3f
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.apple;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.io.EndianUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Parser that strips the header off of AppleSingle and AppleDouble
+ * files.
+ * <p>
+ * See <a href="http://kaiser-edv.de/documents/AppleSingle_AppleDouble.pdf">spec document</a>.
+ */
+public class AppleSingleFileParser extends AbstractParser {
+
+ private static final int MAX_FIELD_LENGTH = 1_073_741_824;
+ /**
+ * Entry types
+ */
+ private static final int DATA_FORK = 1;
+ private static final int RESOURCE_FORK = 2;
+ private static final int REAL_NAME = 3;
+ private static final int COMMENT = 4;
+ private static final int ICON_BW = 5;
+ private static final int ICON_COLOR = 6;
+ //7?!
+ private static final int FILE_DATES_INFO = 8;
+ private static final int FINDER_INFO = 9;
+ private static final int MACINTOSH_FILE_INFO = 10;
+ private static final int PRODOS_FILE_INFO = 11;
+ private static final int MSDOS_FILE_INFO = 12;
+ private static final int SHORT_NAME = 13;
+ private static final int AFP_FILE_INFO = 14;
+ private static final int DIRECTORY_ID = 15;
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(MediaType.application("applefile"));
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+
+ EmbeddedDocumentExtractor ex = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
+
+ short numEntries = readThroughNumEntries(stream);
+ long bytesRead = 26;
+ List<FieldInfo> fieldInfoList = getSortedFieldInfoList(stream, numEntries);
+ bytesRead += 12*numEntries;
+ Metadata embeddedMetadata = new Metadata();
+ bytesRead = processFieldEntries(stream, fieldInfoList, embeddedMetadata, bytesRead);
+ FieldInfo contentFieldInfo = getContentFieldInfo(fieldInfoList);
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ if (contentFieldInfo != null) {
+ long diff = contentFieldInfo.offset-bytesRead;
+ IOUtils.skipFully(stream, diff);
+ if (ex.shouldParseEmbedded(embeddedMetadata)) {
+ // TODO: we should probably add a readlimiting wrapper around this
+ // stream to ensure that not more than contentFieldInfo.length bytes
+ // are read
+ ex.parseEmbedded(new CloseShieldInputStream(stream),
+ xhtml, embeddedMetadata, false);
+ }
+ }
+ xhtml.endDocument();
+
+ }
+
+ private FieldInfo getContentFieldInfo(List<FieldInfo> fieldInfoList) {
+ for (FieldInfo fieldInfo : fieldInfoList) {
+ if (fieldInfo.entryId == 1) {
+ return fieldInfo;
+ }
+ }
+ return null;
+ }
+
+ private long processFieldEntries(InputStream stream, List<FieldInfo> fieldInfoList,
+ Metadata embeddedMetadata, long bytesRead) throws IOException, TikaException {
+ byte[] buffer = null;
+ for (FieldInfo f : fieldInfoList) {
+ long diff = f.offset - bytesRead;
+ //just in case
+ IOUtils.skipFully(stream, diff);
+ bytesRead += diff;
+ if (f.entryId == REAL_NAME) {
+ if (f.length > MAX_FIELD_LENGTH) {
+ throw new TikaMemoryLimitException(f.length, MAX_FIELD_LENGTH);
+ }
+ buffer = new byte[(int)f.length];
+ IOUtils.readFully(stream, buffer);
+ bytesRead += f.length;
+ String originalFileName = new String(buffer, 0, buffer.length, StandardCharsets.US_ASCII);
+ embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, originalFileName);
+ } else if (f.entryId != DATA_FORK) {
+ IOUtils.skipFully(stream, f.length);
+ bytesRead += f.length;
+ }
+ }
+ return bytesRead;
+ }
+
+
+ private List<FieldInfo> getSortedFieldInfoList(InputStream stream, short numEntries) throws IOException, TikaException {
+ //this is probably overkill. I'd hope that these were already
+ //in order. This ensures it.
+ List<FieldInfo> fieldInfoList = new ArrayList<>(numEntries);
+ for (int i = 0; i < numEntries; i++) {
+ //convert 32-bit unsigned ints to longs
+ fieldInfoList.add(
+ new FieldInfo(
+ EndianUtils.readUIntBE(stream), //entry id
+ EndianUtils.readUIntBE(stream), //offset
+ EndianUtils.readUIntBE(stream) //length
+ )
+ );
+ }
+ if (fieldInfoList.size() == 0) {
+ throw new TikaException("AppleSingleFile missing field info");
+ }
+ //make absolutely sure these are in order!
+ fieldInfoList.sort(Comparator.comparingLong(fieldInfo -> fieldInfo.offset));
+ return fieldInfoList;
+ }
+
+ //read through header until you hit the number of entries
+ private short readThroughNumEntries(InputStream stream) throws TikaException, IOException {
+ //mime
+ EndianUtils.readIntBE(stream);
+ //version
+ long version = EndianUtils.readIntBE(stream);
+ if (version != 0x00020000) {
+ throw new TikaException("Version should have been 0x00020000, but was:"+version);
+ }
+ IOUtils.skipFully(stream, 16);//filler
+ return EndianUtils.readShortBE(stream);//number of entries
+ }
+
+ private class FieldInfo {
+
+ private final long entryId;
+ private final long offset;
+ private final long length;
+
+ private FieldInfo(long entryId, long offset, long length) {
+ this.entryId = entryId;
+ this.offset = offset;
+ this.length = length;
+ }
+ }
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/BPListDetector.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/BPListDetector.java
new file mode 100644
index 0000000..731e88e
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/BPListDetector.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.apple;
+
+import com.dd.plist.NSDictionary;
+import com.dd.plist.NSObject;
+import com.dd.plist.PropertyListFormatException;
+import com.dd.plist.PropertyListParser;
+import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.SAXException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.ParseException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Detector for BPList with utility functions for PList.
+ *
+ * Without significant refactoring, this can't easily work as a true
+ * detector on plist subtypes. Rather, for now, we require the file to be
+ * parsed and then the parser adds the subtype for xml-based plists.
+ * @since 1.25
+ */
+public class BPListDetector implements Detector {
+
+ //xml versions
+ static MediaType MEMGRAPH = MediaType.application("x-plist-memgraph");
+ static MediaType WEBARCHIVE = MediaType.application("x-plist-webarchive");
+ static MediaType PLIST = MediaType.application("x-plist");
+ static MediaType ITUNES = MediaType.application("x-plist-itunes");
+
+
+ //binary versions
+ static MediaType BMEMGRAPH = MediaType.application("x-bplist-memgraph");
+ static MediaType BWEBARCHIVE = MediaType.application("x-bplist-webarchive");
+ static MediaType BPLIST = MediaType.application("x-bplist");
+ static MediaType BITUNES = MediaType.application("x-bplist-itunes");
+
+ private static Map<MediaType, MediaType> BINARY_TO_XML = new HashMap<>();
+
+ static {
+ BINARY_TO_XML.put(BMEMGRAPH, MEMGRAPH);
+ BINARY_TO_XML.put(BWEBARCHIVE, WEBARCHIVE);
+ BINARY_TO_XML.put(BPLIST, PLIST);
+ BINARY_TO_XML.put(BITUNES, ITUNES);
+ }
+
+ /**
+ * @param input input stream must support reset
+ * @param metadata input metadata for the document
+ * @return
+ * @throws IOException
+ */
+ @Override
+ public MediaType detect(InputStream input, Metadata metadata) throws IOException {
+ if (input == null) {
+ return MediaType.OCTET_STREAM;
+ }
+ input.mark(8);
+ byte[] bytes = new byte[8];
+
+ try {
+ int read = IOUtils.read(input, bytes);
+ if (read < 6) {
+ return MediaType.OCTET_STREAM;
+ }
+ } catch (IOException e) {
+ return MediaType.OCTET_STREAM;
+ } finally {
+ input.reset();
+ }
+
+ int i = 0;
+ if (bytes[i++] != 'b' || bytes[i++] != 'p'
+ || bytes[i++] != 'l' || bytes[i++] != 'i'
+ || bytes[i++] != 's' || bytes[i++] != 't') {
+ return MediaType.OCTET_STREAM;
+ }
+ //TODO: extract the version with the next two bytes if they were read
+ NSObject rootObj = null;
+ try {
+ if (input instanceof TikaInputStream && ((TikaInputStream) input).hasFile()) {
+ rootObj = PropertyListParser.parse(((TikaInputStream) input).getFile());
+ } else {
+ rootObj = PropertyListParser.parse(input);
+ }
+ if (input instanceof TikaInputStream) {
+ ((TikaInputStream) input).setOpenContainer(rootObj);
+ }
+ } catch (PropertyListFormatException | ParseException | ParserConfigurationException | SAXException e) {
+ throw new IOExceptionWithCause("problem parsing root", e);
+ }
+ if (rootObj instanceof NSDictionary) {
+ return detectOnKeys(((NSDictionary) rootObj).getHashMap().keySet());
+ }
+ return BPLIST;
+ }
+
+ static MediaType detectOnKeys(Set<String> keySet) {
+ if (keySet.contains("nodes") && keySet.contains("edges")
+ && keySet.contains("graphEncodingVersion")) {
+ return BMEMGRAPH;
+ } else if (keySet.contains("WebMainResource")){ //&& keySet.contains("WebSubresources") should we require this?
+ return BWEBARCHIVE;
+ } else if (keySet.contains("Playlists") && keySet.contains("Tracks")
+ && keySet.contains("Music Folder")) {
+ return BITUNES;
+ } //if it contains $archiver and $objects, it is a bplist inside a webarchive
+ return BPLIST;
+ }
+
+ static MediaType detectXMLOnKeys(Set<String> keySet) {
+ return BINARY_TO_XML.get(detectOnKeys(keySet));
+ }
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java
new file mode 100644
index 0000000..2ffbf56
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.iwork;
+
+import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipFile;
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.detect.XmlRootExtractor;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.OfflineContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.utils.XMLReaderUtils;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import javax.xml.namespace.QName;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A parser for the IWork container files. This includes *.key, *.pages and *.numbers files.
+ * This parser delegates the relevant entries to a {@link ContentHandler} that parsers the content.
+ *
+ * Currently supported formats:
+ * <ol>
+ * <li>Keynote format version 2.x. Currently only tested with Keynote version 5.x
+ * <li>Pages format version 1.x. Currently only tested with Pages version 4.0.x
+ * <li>Numbers format version 1.x. Currently only tested with Numbers version 2.0.x
+ * </ol>
+ */
+public class IWorkPackageParser extends AbstractParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = -2160322853809682372L;
+
+ /**
+ * Which files within an iWork file contain the actual content?
+ */
+ public final static Set<String> IWORK_CONTENT_ENTRIES = Collections.unmodifiableSet(
+ new HashSet<String>(Arrays.asList("index.apxl", "index.xml", "presentation.apxl"))
+ );
+ /**
+ * All iWork files contain one of these, so we can detect based on it
+ */
+ public final static String IWORK_COMMON_ENTRY = "buildVersionHistory.plist";
+
+ public enum IWORKDocumentType {
+ KEYNOTE("http://developer.apple.com/namespaces/keynote2", "presentation", MediaType.application("vnd.apple.keynote")),
+ NUMBERS("http://developer.apple.com/namespaces/ls", "document", MediaType.application("vnd.apple.numbers")),
+ PAGES("http://developer.apple.com/namespaces/sl", "document", MediaType.application("vnd.apple.pages")),
+ ENCRYPTED(null, null, MediaType.application("x-tika-iworks-protected"));
+
+ private final String namespace;
+ private final String part;
+ private final MediaType type;
+
+ IWORKDocumentType(String namespace, String part, MediaType type) {
+ this.namespace = namespace;
+ this.part = part;
+ this.type = type;
+ }
+
+ public String getNamespace() {
+ return namespace;
+ }
+
+ public String getPart() {
+ return part;
+ }
+
+ public MediaType getType() {
+ return type;
+ }
+
+ public static IWORKDocumentType detectType(ZipArchiveEntry entry, ZipFile zip) {
+ try {
+ if (entry == null) {
+ return null;
+ }
+
+ try (InputStream stream = zip.getInputStream(entry)) {
+ return detectType(stream);
+ }
+ } catch (IOException e) {
+ return null;
+ }
+ }
+
+ public static IWORKDocumentType detectType(ZipArchiveEntry entry, ZipArchiveInputStream zip) {
+ if (entry == null) {
+ return null;
+ }
+
+ return detectType(zip);
+ }
+
+ public static IWORKDocumentType detectType(InputStream stream) {
+ QName qname = new XmlRootExtractor().extractRootElement(stream);
+ if (qname != null) {
+ String uri = qname.getNamespaceURI();
+ String local = qname.getLocalPart();
+
+ for (IWORKDocumentType type : values()) {
+ if(type.getNamespace().equals(uri) &&
+ type.getPart().equals(local)) {
+ return type;
+ }
+ }
+ } else {
+ // There was a problem with extracting the root type
+ // Password Protected iWorks files are funny, but we can usually
+ // spot them because they encrypt part of the zip stream
+ try {
+ stream.read();
+ } catch(UnsupportedZipFeatureException e) {
+ // Compression field was likely encrypted
+ return ENCRYPTED;
+ } catch(Exception ignored) {
+ }
+ }
+ return null;
+ }
+ }
+
+ /**
+ * This parser handles all iWorks formats.
+ */
+ private final static Set<MediaType> supportedTypes =
+ Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+ MediaType.application("vnd.apple.iwork"),
+ IWORKDocumentType.KEYNOTE.getType(),
+ IWORKDocumentType.NUMBERS.getType(),
+ IWORKDocumentType.PAGES.getType()
+ )));
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return supportedTypes;
+ }
+
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ ZipArchiveInputStream zip = new ZipArchiveInputStream(stream);
+ ZipArchiveEntry entry = zip.getNextZipEntry();
+
+ while (entry != null) {
+ if (!IWORK_CONTENT_ENTRIES.contains(entry.getName())) {
+ entry = zip.getNextZipEntry();
+ continue;
+ }
+
+ InputStream entryStream = new BufferedInputStream(zip, 4096);
+ entryStream.mark(4096);
+ IWORKDocumentType type = IWORKDocumentType.detectType(entryStream);
+ entryStream.reset();
+
+ if(type != null) {
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ ContentHandler contentHandler;
+
+ switch(type) {
+ case KEYNOTE:
+ contentHandler = new KeynoteContentHandler(xhtml, metadata);
+ break;
+ case NUMBERS:
+ contentHandler = new NumbersContentHandler(xhtml, metadata);
+ break;
+ case PAGES:
+ contentHandler = new PagesContentHandler(xhtml, metadata);
+ break;
+ case ENCRYPTED:
+ // We can't do anything for the file right now
+ contentHandler = null;
+ break;
+ default:
+ throw new TikaException("Unhandled iWorks file " + type);
+ }
+
+ metadata.add(Metadata.CONTENT_TYPE, type.getType().toString());
+ xhtml.startDocument();
+ if (contentHandler != null) {
+ XMLReaderUtils.parseSAX(
+ new CloseShieldInputStream(entryStream),
+ new OfflineContentHandler(contentHandler),
+ context
+ );
+ }
+ xhtml.endDocument();
+ }
+
+ entry = zip.getNextZipEntry();
+ }
+ // Don't close the zip InputStream (TIKA-1117).
+ }
+
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java
new file mode 100644
index 0000000..40b3d60
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.iwork;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+class KeynoteContentHandler extends DefaultHandler {
+
+ public final static String PRESENTATION_WIDTH = "slides-width";
+ public final static String PRESENTATION_HEIGHT = "slides-height";
+
+ private final XHTMLContentHandler xhtml;
+ private final Metadata metadata;
+
+ private boolean inSlide = false;
+ private boolean inTheme = false;
+ private boolean inTitle = false;
+ private boolean inBody = false;
+ private String tableId;
+ private Integer numberOfColumns = null;
+ private Integer currentColumn = null;
+
+ private boolean inMetadata = false;
+ private boolean inMetaDataTitle = false;
+ private boolean inMetaDataAuthors = false;
+
+ private boolean inParsableText = false;
+
+ private int numberOfSlides = 0;
+
+ KeynoteContentHandler(XHTMLContentHandler xhtml, Metadata metadata) {
+ this.xhtml = xhtml;
+ this.metadata = metadata;
+ }
+
+ @Override
+ public void endDocument() throws SAXException {
+ metadata.set(Office.SLIDE_COUNT, String.valueOf(numberOfSlides));
+ }
+
+ @Override
+ public void startElement(
+ String uri, String localName, String qName, Attributes attributes)
+ throws SAXException {
+ if ("key:theme".equals(qName)) {
+ inTheme = true;
+ } else if ("key:slide".equals(qName)) {
+ inSlide = true;
+ numberOfSlides++;
+ xhtml.startElement("div");
+ } else if ("key:master-slide".equals(qName)) {
+ inSlide = true;
+ xhtml.startElement("div");
+ } else if ("key:title-placeholder".equals(qName) && inSlide) {
+ inTitle = true;
+ xhtml.startElement("h1");
+ } else if ("sf:sticky-note".equals(qName) && inSlide) {
+ xhtml.startElement("p");
+ } else if ("key:notes".equals(qName) && inSlide) {
+ xhtml.startElement("p");
+ } else if ("key:body-placeholder".equals(qName) && inSlide) {
+ xhtml.startElement("p");
+ inBody = true;
+ } else if ("key:size".equals(qName) && !inTheme) {
+ String width = attributes.getValue("sfa:w");
+ String height = attributes.getValue("sfa:h");
+ metadata.set(PRESENTATION_WIDTH, width);
+ metadata.set(PRESENTATION_HEIGHT, height);
+ } else if ("sf:text-body".equals(qName)) {
+ inParsableText = true;
+ } else if ("key:metadata".equals(qName)) {
+ inMetadata = true;
+ } else if (inMetadata && "key:title".equals(qName)) {
+ inMetaDataTitle = true;
+ } else if (inMetadata && "key:authors".equals(qName)) {
+ inMetaDataAuthors = true;
+ } else if (inMetaDataTitle && "key:string".equals(qName)) {
+ metadata.set(TikaCoreProperties.TITLE, attributes.getValue("sfa:string"));
+ } else if (inMetaDataAuthors && "key:string".equals(qName)) {
+ metadata.add(TikaCoreProperties.CREATOR, attributes.getValue("sfa:string"));
+ } else if (inSlide && "sf:tabular-model".equals(qName)) {
+ tableId = attributes.getValue("sfa:ID");
+ xhtml.startElement("table");
+ } else if (tableId != null && "sf:columns".equals(qName)) {
+ numberOfColumns = Integer.parseInt(attributes.getValue("sf:count"));
+ currentColumn = 0;
+ } else if (tableId != null && "sf:ct".equals(qName)) {
+ parseTableData(attributes.getValue("sfa:s"));
+ } else if (tableId != null && "sf:n".equals(qName)) {
+ parseTableData(attributes.getValue("sf:v"));
+ } else if ("sf:p".equals(qName)) {
+ xhtml.startElement("p");
+ }
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ if ("key:theme".equals(qName)) {
+ inTheme = false;
+ } else if ("key:slide".equals(qName)) {
+ inSlide = false;
+ xhtml.endElement("div");
+ } else if ("key:master-slide".equals(qName)) {
+ inSlide = false;
+ xhtml.endElement("div");
+ } else if ("key:title-placeholder".equals(qName) && inSlide) {
+ inTitle = false;
+ xhtml.endElement("h1");
+ } else if ("sf:sticky-note".equals(qName) && inSlide) {
+ xhtml.endElement("p");
+ } else if ("key:notes".equals(qName) && inSlide) {
+ xhtml.endElement("p");
+ } else if ("key:body-placeholder".equals(qName) && inSlide) {
+ xhtml.endElement("p");
+ inBody = false;
+ } else if ("sf:text-body".equals(qName)) {
+ inParsableText = false;
+ } else if ("key:metadata".equals(qName)) {
+ inMetadata = false;
+ } else if (inMetadata && "key:title".equals(qName)) {
+ inMetaDataTitle = false;
+ } else if (inMetadata && "key:authors".equals(qName)) {
+ inMetaDataAuthors = false;
+ } else if (inSlide && "sf:tabular-model".equals(qName)) {
+ xhtml.endElement("table");
+ tableId = null;
+ numberOfColumns = null;
+ currentColumn = null;
+ } else if ("sf:p".equals(qName)) {
+ xhtml.endElement("p");
+ }
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ if (inParsableText && inSlide && length != 0) {
+ xhtml.characters(ch, start, length);
+ }
+ }
+
+ private void parseTableData(String value) throws SAXException {
+ if (currentColumn == 0) {
+ xhtml.startElement("tr");
+ }
+ xhtml.element("td", value);
+
+ currentColumn++;
+ if (currentColumn.equals(numberOfColumns)) {
+ xhtml.endElement("tr");
+ currentColumn = 0;
+ }
+ }
+
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/NumbersContentHandler.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/NumbersContentHandler.java
new file mode 100644
index 0000000..2ee64be
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/NumbersContentHandler.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.iwork;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.util.HashMap;
+import java.util.Map;
+
+class NumbersContentHandler extends DefaultHandler {
+
+ private final XHTMLContentHandler xhtml;
+ private final Metadata metadata;
+
+ private boolean inSheet = false;
+
+ private boolean inText = false;
+ private boolean parseText = false;
+
+ private boolean inMetadata = false;
+ private Property metadataKey;
+ private String metadataPropertyQName;
+
+ private boolean inTable = false;
+ private int numberOfSheets = 0;
+ private int numberOfColumns = -1;
+ private int currentColumn = 0;
+
+ private Map<String, String> menuItems = new HashMap<String, String>();
+ private String currentMenuItemId;
+
+ NumbersContentHandler(XHTMLContentHandler xhtml, Metadata metadata) {
+ this.xhtml = xhtml;
+ this.metadata = metadata;
+ }
+
+ @Override
+ public void endDocument() throws SAXException {
+ metadata.set(Office.PAGE_COUNT, String.valueOf(numberOfSheets));
+ }
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
+ if ("ls:workspace".equals(qName)) {
+ inSheet = true;
+ numberOfSheets++;
+ xhtml.startElement("div");
+ String sheetName = attributes.getValue("ls:workspace-name");
+ metadata.add("sheetNames", sheetName);
+ }
+
+ if ("sf:text".equals(qName)) {
+ inText = true;
+ xhtml.startElement("p");
+ }
+
+ if ("sf:p".equals(qName)) {
+ parseText = true;
+ }
+
+ if ("sf:metadata".equals(qName)) {
+ inMetadata = true;
+ return;
+ }
+
+ if (inMetadata && metadataKey == null) {
+ metadataKey = resolveMetadataKey(localName);
+ metadataPropertyQName = qName;
+ }
+
+ if (inMetadata && metadataKey != null && "sf:string".equals(qName)) {
+ metadata.add(metadataKey, attributes.getValue("sfa:string"));
+ }
+
+ if (!inSheet) {
+ return;
+ }
+
+ if ("sf:tabular-model".equals(qName)) {
+ String tableName = attributes.getValue("sf:name");
+ xhtml.startElement("div");
+ xhtml.characters(tableName);
+ xhtml.endElement("div");
+ inTable = true;
+ xhtml.startElement("table");
+ xhtml.startElement("tr");
+ currentColumn = 0;
+ }
+
+ if ("sf:menu-choices".equals(qName)) {
+ menuItems = new HashMap<String, String>();
+ }
+
+ if (inTable && "sf:grid".equals(qName)) {
+ numberOfColumns = Integer.parseInt(attributes.getValue("sf:numcols"));
+ }
+
+ if (menuItems != null && "sf:t".equals(qName)) {
+ currentMenuItemId = attributes.getValue("sfa:ID");
+ }
+
+ if (currentMenuItemId != null && "sf:ct".equals(qName)) {
+ menuItems.put(currentMenuItemId, attributes.getValue("sfa:s"));
+ }
+
+ if (inTable && "sf:ct".equals(qName)) {
+ if (currentColumn >= numberOfColumns) {
+ currentColumn = 0;
+ xhtml.endElement("tr");
+ xhtml.startElement("tr");
+ }
+
+ xhtml.element("td", attributes.getValue("sfa:s"));
+ currentColumn++;
+ }
+
+ if (inTable && ("sf:n".equals(qName) || "sf:rn".equals(qName))) {
+ if (currentColumn >= numberOfColumns) {
+ currentColumn = 0;
+ xhtml.endElement("tr");
+ xhtml.startElement("tr");
+ }
+
+ xhtml.element("td", attributes.getValue("sf:v"));
+ currentColumn++;
+ }
+
+ if (inTable && "sf:proxied-cell-ref".equals(qName)) {
+ if (currentColumn >= numberOfColumns) {
+ currentColumn = 0;
+ xhtml.endElement("tr");
+ xhtml.startElement("tr");
+ }
+
+ xhtml.element("td", menuItems.get(attributes.getValue("sfa:IDREF")));
+ currentColumn++;
+ }
+
+ if ("sf:chart-name".equals(qName)) {
+ // Extract chart name:
+ xhtml.startElement("div", "class", "chart");
+ xhtml.startElement("h1");
+ xhtml.characters(attributes.getValue("sfa:string"));
+ xhtml.endElement("h1");
+ xhtml.endElement("div");
+ }
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ if (parseText && length > 0) {
+ xhtml.characters(ch, start, length);
+ }
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ if ("ls:workspace".equals(qName)) {
+ inSheet = false;
+ xhtml.endElement("div");
+ }
+
+ if ("sf:text".equals(qName)) {
+ inText = false;
+ xhtml.endElement("p");
+ }
+
+ if ("sf:p".equals(qName)) {
+ parseText = false;
+ }
+
+ if ("sf:metadata".equals(qName)) {
+ inMetadata = false;
+ }
+
+ if (inMetadata && qName.equals(metadataPropertyQName)) {
+ metadataPropertyQName = null;
+ metadataKey = null;
+ }
+
+ if (!inSheet) {
+ return;
+ }
+
+ if ("sf:menu-choices".equals(qName)) {
+ }
+
+ if ("sf:tabular-model".equals(qName)) {
+ inTable = false;
+ xhtml.endElement("tr");
+ xhtml.endElement("table");
+ }
+
+ if (currentMenuItemId != null && "sf:t".equals(qName)) {
+ currentMenuItemId = null;
+ }
+ }
+
+ private Property resolveMetadataKey(String localName) {
+ if ("authors".equals(localName)) {
+ return TikaCoreProperties.CREATOR;
+ }
+ if ("title".equals(localName)) {
+ return TikaCoreProperties.TITLE;
+ }
+ if ("comment".equals(localName)) {
+ return TikaCoreProperties.COMMENTS;
+ }
+ return Property.internalText(localName);
+ }
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector
new file mode 100644
index 0000000..f3fd873
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.tika.parser.apple.BPListDetector
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
new file mode 100644
index 0000000..c922b2e
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.tika.parser.apple.AppleSingleFileParser
+org.apache.tika.parser.apple.PListParser
+org.apache.tika.parser.iwork.iwana.IWork13PackageParser
+org.apache.tika.parser.iwork.iwana.IWork18PackageParser
+org.apache.tika.parser.iwork.IWorkPackageParser
+
+
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/java/org/apache/tika/parser/iwork/AutoPageNumberUtilsTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/java/org/apache/tika/parser/iwork/AutoPageNumberUtilsTest.java
new file mode 100644
index 0000000..65e7121
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/java/org/apache/tika/parser/iwork/AutoPageNumberUtilsTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.iwork;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.parser.iwork.AutoPageNumberUtils;
+import org.junit.Test;
+
+/**
+ * Test class for the <code>AutoPageNumberUtils</code> helper class.
+ */
+public class AutoPageNumberUtilsTest {
+
+ /**
+ * Check upper-case alpha-numeric numbers are generated based on the
+ * input page number.
+ */
+ @Test
+ public void testAlphaUpper() {
+ assertEquals("A", AutoPageNumberUtils.asAlphaNumeric(1));
+ assertEquals("Z", AutoPageNumberUtils.asAlphaNumeric(26));
+ assertEquals("AA", AutoPageNumberUtils.asAlphaNumeric(27));
+ assertEquals("ZZ", AutoPageNumberUtils.asAlphaNumeric(52));
+ assertEquals("AAA", AutoPageNumberUtils.asAlphaNumeric(53));
+ assertEquals("ZZZ", AutoPageNumberUtils.asAlphaNumeric(78));
+ }
+
+ /**
+ * Check lower-case alpha-numeric numbers are generated based on the
+ * input page number.
+ */
+ @Test
+ public void testAlphaLower() {
+ assertEquals("a", AutoPageNumberUtils.asAlphaNumericLower(1));
+ assertEquals("z", AutoPageNumberUtils.asAlphaNumericLower(26));
+ assertEquals("aa", AutoPageNumberUtils.asAlphaNumericLower(27));
+ assertEquals("zz", AutoPageNumberUtils.asAlphaNumericLower(52));
+ assertEquals("aaa", AutoPageNumberUtils.asAlphaNumericLower(53));
+ assertEquals("zzz", AutoPageNumberUtils.asAlphaNumericLower(78));
+ }
+
+ /**
+ * Check upper-case Roman numerals numbers are generated based on the
+ * input page number.
+ */
+ @Test
+ public void testRomanUpper() {
+ assertEquals("I", AutoPageNumberUtils.asRomanNumerals(1));
+ assertEquals("XXVI", AutoPageNumberUtils.asRomanNumerals(26));
+ assertEquals("XXVII", AutoPageNumberUtils.asRomanNumerals(27));
+ }
+
+ /**
+ * Check lower-case Roman numerals numbers are generated based on the
+ * input page number.
+ */
+ @Test
+ public void testRomanLower() {
+ assertEquals("i", AutoPageNumberUtils.asRomanNumeralsLower(1));
+ assertEquals("xxvi", AutoPageNumberUtils.asRomanNumeralsLower(26));
+ assertEquals("xxvii", AutoPageNumberUtils.asRomanNumeralsLower(27));
+ }
+
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java
new file mode 100644
index 0000000..5c4d5d1
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java
@@ -0,0 +1,392 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.iwork;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Tests if the IWork parser parses the content and metadata properly of the supported formats.
+ */
+public class IWorkParserTest extends TikaTest {
+
+ private IWorkPackageParser iWorkParser;
+
+ @Before
+ public void setUp() {
+ iWorkParser = new IWorkPackageParser();
+ }
+
+ /**
+ * Check the given InputStream is not closed by the Parser (TIKA-1117).
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testStreamNotClosed() throws Exception {
+ InputStream input = getResourceAsStream("/test-documents/testKeynote.key");
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ iWorkParser.parse(input, handler, metadata);
+ input.read(); // Will throw an Exception if the stream was already closed.
+ }
+
+ @Test
+ public void testParseKeynote() throws Exception {
+ Metadata metadata = new Metadata();
+ String content = getText("testKeynote.key", iWorkParser, metadata);
+
+ // Make sure enough keys came through
+ // (Exact numbers will vary based on composites)
+ assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 6);
+ List<String> metadataKeys = Arrays.asList(metadata.names());
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.SLIDE_COUNT.getName()));
+// assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.SLIDE_COUNT.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
+
+ // Check the metadata values
+ assertEquals("application/vnd.apple.keynote", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("3", metadata.get(Office.SLIDE_COUNT));
+ assertEquals("1024", metadata.get(KeynoteContentHandler.PRESENTATION_WIDTH));
+ assertEquals("768", metadata.get(KeynoteContentHandler.PRESENTATION_HEIGHT));
+ assertEquals("Tika user", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Apache tika", metadata.get(TikaCoreProperties.TITLE));
+
+ assertContains("A sample presentation", content);
+ assertContains("For the Apache Tika project", content);
+ assertContains("Slide 1", content);
+ assertContains("Some random text for the sake of testability.", content);
+ assertContains("A nice comment", content);
+ assertContains("A nice note", content);
+
+ // test table data
+ assertContains("Cell one", content);
+ assertContains("Cell two", content);
+ assertContains("Cell three", content);
+ assertContains("Cell four", content);
+ assertContains("Cell 5", content);
+ assertContains("Cell six", content);
+ assertContains("7", content);
+ assertContains("Cell eight", content);
+ assertContains("5/5/1985", content);
+ }
+
+ // TIKA-910
+ @Test
+ public void testKeynoteTextBoxes() throws Exception {
+ String content = getText("testTextBoxes.key", iWorkParser);
+ assertTrue(content.replaceAll("\\s+", " ").contains("text1 text2 text3"));
+ }
+
+ // TIKA-910
+ @Test
+ public void testKeynoteBulletPoints() throws Exception {
+ String content = getText("testBulletPoints.key", iWorkParser);
+ assertTrue(content.replaceAll("\\s+", " ").contains("bullet point 1 bullet point 2 bullet point 3"));
+ }
+
+ // TIKA-923
+ @Test
+ public void testKeynoteTables() throws Exception {
+ String content = getText("testTables.key", iWorkParser);
+ content = content.replaceAll("\\s+", " ");
+ assertContains("row 1 row 2 row 3", content);
+ }
+
+ // TIKA-923
+ @Test
+ public void testKeynoteMasterSlideTable() throws Exception {
+ String content = getText("testMasterSlideTable.key", iWorkParser);
+ content = content.replaceAll("\\s+", " ");
+ assertContains("master row 1", content);
+ assertContains("master row 2", content);
+ assertContains("master row 3", content);
+ }
+
+ @Test
+ public void testParsePages() throws Exception {
+ Metadata metadata = new Metadata();
+ String content = getText("testPages.pages", iWorkParser, metadata);
+ // Make sure enough keys came through
+ // (Exact numbers will vary based on composites)
+ assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 50);
+ List<String> metadataKeys = Arrays.asList(metadata.names());
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.PAGE_COUNT.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.LAST_MODIFIED.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.LANGUAGE.getName()));
+
+ // Check the metadata values
+ assertEquals("application/vnd.apple.pages", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Tika user", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Apache tika", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("2010-05-09T21:34:38+0200", metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2010-05-09T23:50:36+0200", metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("en", metadata.get(TikaCoreProperties.LANGUAGE));
+ assertEquals("2", metadata.get(Office.PAGE_COUNT));
+
+ // text on page 1
+ assertContains("Sample pages document", content);
+ assertContains("Some plain text to parse.", content);
+ assertContains("Cell one", content);
+ assertContains("Cell two", content);
+ assertContains("Cell three", content);
+ assertContains("Cell four", content);
+ assertContains("Cell five", content);
+ assertContains("Cell six", content);
+ assertContains("Cell seven", content);
+ assertContains("Cell eight", content);
+ assertContains("Cell nine", content);
+ assertContains("Both Pages 1.x and Keynote 2.x", content); // ...
+
+ // text on page 2
+ assertContains("A second page....", content);
+ assertContains("Extensible Markup Language", content); // ...
+ }
+
+ // TIKA-904
+ @Test
+ public void testPagesLayoutMode() throws Exception {
+ String content = getText("testPagesLayout.pages");
+ assertContains("text box 1 - here is some text", content);
+ assertContains("created in a text box in layout mode", content);
+ assertContains("text box 2 - more text!@!$@#", content);
+ assertContains("this is text inside of a green box", content);
+ assertContains("text inside of a green circle", content);
+ }
+
+ @Test
+ public void testParseNumbers() throws Exception {
+ Metadata metadata = new Metadata();
+ String content = getText("testNumbers.numbers", iWorkParser, metadata);
+
+ // Make sure enough keys came through
+ // (Exact numbers will vary based on composites)
+ assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 8);
+ List<String> metadataKeys = Arrays.asList(metadata.names());
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.PAGE_COUNT.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.COMMENTS.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
+
+ // Check the metadata values
+ assertEquals("2", metadata.get(Office.PAGE_COUNT));
+ assertEquals("Tika User", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Account checking", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("a comment", metadata.get(TikaCoreProperties.COMMENTS));
+
+ assertContains("Category", content);
+ assertContains("Home", content);
+ assertContains("-226", content);
+ assertContains("-137.5", content);
+ assertContains("Checking Account: 300545668", content);
+ assertContains("4650", content);
+ assertContains("Credit Card", content);
+ assertContains("Groceries", content);
+ assertContains("-210", content);
+ assertContains("Food", content);
+ assertContains("Try adding your own account transactions to this table.", content);
+ }
+
+ // TIKA- 924
+ @Test
+ public void testParseNumbersTableNames() throws Exception {
+ String content = getText("tableNames.numbers", iWorkParser);
+ assertContains("This is the main table", content);
+ }
+
+ @Test
+ public void testParseNumbersTableHeaders() throws Exception {
+ String content = getText("tableHeaders.numbers");
+ for(int header = 1;header <= 5;header++) {
+ assertContains("header" + header, content);
+ }
+ for(int row = 1;row <= 3;row++) {
+ assertContains("row" + row, content);
+ }
+ }
+
+ /**
+ * We don't currently support password protected Pages files, as
+ * we don't know how the encryption works (it's not regular Zip
+ * Encryption). See TIKA-903 for details
+ */
+ @Test
+ public void testParsePagesPasswordProtected() throws Exception {
+ // Document password is "tika", but we can't use that yet...
+ Metadata metadata = new Metadata();
+ String content = getText("testPagesPwdProtected.pages", iWorkParser, metadata);
+ assertEquals("", content);
+
+ // Will have been identified as encrypted
+ assertEquals("application/x-tika-iworks-protected", metadata.get(Metadata.CONTENT_TYPE));
+ }
+
+ /**
+ * Check we get headers, footers and footnotes from Pages
+ */
+ @Test
+ public void testParsePagesHeadersFootersFootnotes() throws Exception {
+ String footnote = "Footnote: Do a lot of people really use iWork?!?!";
+ String header = "THIS IS SOME HEADER TEXT";
+ String footer = "THIS IS SOME FOOTER TEXT\t1";
+ String footer2 = "THIS IS SOME FOOTER TEXT\t2";
+
+ String content = getText("testPagesHeadersFootersFootnotes.pages", iWorkParser);
+
+ // Check regular text
+ assertContains("Both Pages 1.x", content); // P1
+ assertContains("understanding the Pages document", content); // P1
+ assertContains("should be page 2", content); // P2
+
+ // Check for headers, footers and footnotes
+ assertContains(header, content);
+ assertContains(footer, content);
+ assertContains(footer2, content);
+ assertContains(footnote, content);
+ }
+
+ /**
+ * Check we get upper-case Roman numerals within the footer for AutoPageNumber.
+ */
+ @Test
+ public void testParsePagesHeadersFootersRomanUpper() throws Exception {
+ String header = "THIS IS SOME HEADER TEXT";
+ String footer = "THIS IS SOME FOOTER TEXT\tI";
+ String footer2 = "THIS IS SOME FOOTER TEXT\tII";
+
+ String content = getText("testPagesHeadersFootersRomanUpper.pages", iWorkParser);
+
+ // Check for headers, footers and footnotes
+ assertContains(header, content);
+ assertContains(footer, content);
+ assertContains(footer2, content);
+ }
+
+ /**
+ * Check we get lower-case Roman numerals within the footer for AutoPageNumber.
+ */
+ @Test
+ public void testParsePagesHeadersFootersRomanLower() throws Exception {
+ String header = "THIS IS SOME HEADER TEXT";
+ String footer = "THIS IS SOME FOOTER TEXT\ti";
+ String footer2 = "THIS IS SOME FOOTER TEXT\tii";
+
+ String content = getText("testPagesHeadersFootersRomanLower.pages", iWorkParser);
+
+ // Check for headers, footers and footnotes
+ assertContains(header, content);
+ assertContains(footer, content);
+ assertContains(footer2, content);
+ }
+
+ /**
+ * Check we get upper-case alpha-numeric letters within the footer for AutoPageNumber.
+ */
+ @Test
+ public void testParsePagesHeadersAlphaUpper() throws Exception {
+ String header = "THIS IS SOME HEADER TEXT\tA";
+ String footer = "THIS IS SOME FOOTER TEXT\tA";
+ String footer2 = "THIS IS SOME FOOTER TEXT\tB";
+
+ String content = getText("testPagesHeadersFootersAlphaUpper.pages", iWorkParser);
+
+ // Check for headers, footers and footnotes
+ assertContains(header, content);
+ assertContains(footer, content);
+ assertContains(footer2, content);
+ }
+
+ /**
+ * Check we get lower-case alpha-numeric letters within the footer for AutoPageNumber.
+ */
+ @Test
+ public void testParsePagesHeadersAlphaLower() throws Exception {
+ String header = "THIS IS SOME HEADER TEXT";
+ String footer = "THIS IS SOME FOOTER TEXT\ta";
+ String footer2 = "THIS IS SOME FOOTER TEXT\tb";
+
+ String content = getText("testPagesHeadersFootersAlphaLower.pages", iWorkParser);
+
+ // Check for headers, footers and footnotes
+ assertContains(header, content);
+ assertContains(footer, content);
+ assertContains(footer2, content);
+ }
+
+ /**
+ * Check we get annotations (eg comments) from Pages
+ */
+ @Test
+ public void testParsePagesAnnotations() throws Exception {
+ String commentA = "comment about the APXL file";
+ String commentB = "comment about UIMA";
+
+ String content = getText("testPagesComments.pages", iWorkParser);
+
+ // Check regular text
+ assertContains("Both Pages 1.x", content); // P1
+ assertContains("understanding the Pages document", content); // P1
+ assertContains("should be page 2", content); // P2
+
+ // Check for comments
+ assertContains(commentA, content);
+ assertContains(commentB, content);
+ }
+
+ // TIKA-918
+ @Test
+ public void testNumbersExtractChartNames() throws Exception {
+ String content = getText("testNumbersCharts.numbers");
+ assertContains("Expenditure by Category", content);
+ assertContains("Currency Chart name", content);
+ assertContains("Chart 2", content);
+ }
+
+ //TIKA-3020
+ @Test
+ public void testKeyNoteTableMarkup() throws Exception {
+ String expected = "<table><tr>\t<td>Cell one</td>\t<td>Cell two</td>\t<td>Cell three</td></tr>" +
+ "<tr>\t<td>Cell four</td>\t<td>Cell 5</td>\t<td>Cell six</td></tr>" +
+ "<tr>\t<td>7</td>\t<td>Cell eight</td>\t<td>5/5/1985</td></tr>" +
+ "</table>";
+ String xml = getXML("testKeynote.key", iWorkParser).xml;
+ xml = xml.replaceAll("[\r\n]", "");
+ assertContains(expected, xml);
+
+ }
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testAppleSingleFile.pdf b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testAppleSingleFile.pdf
new file mode 100644
index 0000000..a407ded
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testAppleSingleFile.pdf differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testKeynote2013.key b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testKeynote2013.key
new file mode 100644
index 0000000..d0dd416
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testKeynote2013.key differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testMasterSlideTable.key b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testMasterSlideTable.key
new file mode 100644
index 0000000..2627770
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testMasterSlideTable.key differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testNumbers.numbers b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testNumbers.numbers
new file mode 100644
index 0000000..51360e0
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testNumbers.numbers differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testNumbers2013.numbers b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testNumbers2013.numbers
new file mode 100644
index 0000000..3f9a013
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testNumbers2013.numbers differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPages2013.pages b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPages2013.pages
new file mode 100644
index 0000000..b82ac7a
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPages2013.pages differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesComments.pages b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesComments.pages
new file mode 100644
index 0000000..d7ff81c
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesComments.pages differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesHeadersFootersFootnotes.pages b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesHeadersFootersFootnotes.pages
new file mode 100644
index 0000000..cfecc8c
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesHeadersFootersFootnotes.pages differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesPwdProtected.pages b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesPwdProtected.pages
new file mode 100644
index 0000000..788b516
Binary files /dev/null and b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testPagesPwdProtected.pages differ
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testWEBARCHIVE.webarchive b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testWEBARCHIVE.webarchive
new file mode 100644
index 0000000..b78643a
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-apple-module/src/test/resources/test-documents/testWEBARCHIVE.webarchive
@@ -0,0 +1,646 @@
+bplist00�_WebMainResource_WebSubresources�
+
_WebResourceData_WebResourceMIMEType_WebResourceTextEncodingName_WebResourceFrameName^WebResourceURLOP�<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+--><html xmlns="http://www.w3.org/1999/xhtml"><head>
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+ <title>Apache Tika - Apache Tika</title>
+ <style type="text/css" media="all">
+ @import url("./css/site.css");
+ </style>
+ <link rel="icon" type="image/png" href="./tikaNoText16.png">
+ <script type="text/javascript">
+ function selectProvider(form) {
+ provider = form.elements['searchProvider'].value;
+ if (provider == "any") {
+ if (Math.random() > 0.5) {
+ provider = "lucid";
+ } else {
+ provider = "sl";
+ }
+ }
+ if (provider == "lucid") {
+ form.action = "http://search.lucidimagination.com/p:tika";
+ } else if (provider == "sl") {
+ form.action = "http://search-lucene.com/tika";
+ }
+ days = 90;
+ date = new Date();
+ date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
+ expires = "; expires=" + date.toGMTString();
+ document.cookie = "searchProvider=" + provider + expires + "; path=/";
+ }
+ function initProvider() {
+ if (document.cookie.length>0) {
+ cStart=document.cookie.indexOf("searchProvider=");
+ if (cStart!=-1) {
+ cStart=cStart + "searchProvider=".length;
+ cEnd=document.cookie.indexOf(";", cStart);
+ if (cEnd==-1) {
+ cEnd=document.cookie.length;
+ }
+ provider = unescape(document.cookie.substring(cStart,cEnd));
+ document.forms['searchform'].elements['searchProvider'].value = provider;
+ }
+ }
+ document.forms['searchform'].elements['q'].focus();
+ }
+ </script>
+ </head>
+ <body onload="initProvider();">
+ <div id="body">
+ <div id="banner">
+ <a href="http://tika.apache.org" id="bannerLeft" title="Apache Tika"><img src="http://tika.apache.org/tika.png" alt="Apache Tika" width="292" height="100"></a>
+ <a href="http://www.apache.org/" id="bannerRight" title="The Apache Software Foundation"><img src="http://tika.apache.org/asf-logo.gif" alt="The Apache Software Foundation" width="387" height="100"></a>
+ </div>
+ <div id="content">
+ <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements. See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License. You may obtain a copy of the License at --><!-- --><!-- http://www.apache.org/ [...]
+ </div>
+ <div id="sidebar">
+ <div id="navigation">
+ <h5>Apache Tika</h5>
+ <ul>
+
+ <li class="none">
+ <strong>Introduction</strong>
+ </li>
+
+ <li class="none">
+ <a href="download.html">Download</a>
+ </li>
+
+ <li class="none">
+ <a href="mail-lists.html">Mailing Lists</a>
+ </li>
+
+ <li class="none">
+ <a href="http://wiki.apache.org/tika/" class="externalLink">Tika Wiki</a>
+ </li>
+
+ <li class="none">
+ <a href="https://issues.apache.org/jira/browse/TIKA" class="externalLink">Issue Tracker</a>
+ </li>
+ </ul>
+ <h5>Documentation</h5>
+ <ul>
+
+
+
+
+
+
+
+
+
+ <li class="expanded">
+ <a href="1.0/index.html">Apache Tika 1.0</a>
+ <ul>
+
+ <li class="none">
+ <a href="1.0/gettingstarted.html">Getting Started</a>
+ </li>
+
+ <li class="none">
+ <a href="1.0/formats.html">Supported Formats</a>
+ </li>
+
+ <li class="none">
+ <a href="1.0/parser.html">Parser API</a>
+ </li>
+
+ <li class="none">
+ <a href="1.0/parser_guide.html">Parser 5min Quick Start Guide</a>
+ </li>
+
+ <li class="none">
+ <a href="1.0/detection.html">Content and Language Detection</a>
+ </li>
+
+ <li class="none">
+ <a href="1.0/api/">API Documentation</a>
+ </li>
+ </ul>
+ </li>
+
+
+
+
+
+
+
+
+
+ <li class="collapsed">
+ <a href="0.10/index.html">Apache Tika 0.10</a>
+ </li>
+
+
+
+
+
+
+
+
+
+ <li class="collapsed">
+ <a href="0.9/index.html">Apache Tika 0.9</a>
+ </li>
+
+
+
+
+
+
+
+
+
+ <li class="collapsed">
+ <a href="0.8/index.html">Apache Tika 0.8</a>
+ </li>
+ </ul>
+ <h5>The Apache Software Foundation</h5>
+ <ul>
+
+ <li class="none">
+ <a href="http://www.apache.org/foundation/" class="externalLink">About</a>
+ </li>
+
+ <li class="none">
+ <a href="http://www.apache.org/licenses/" class="externalLink">License</a>
+ </li>
+
+ <li class="none">
+ <a href="http://www.apache.org/security/" class="externalLink">Security</a>
+ </li>
+
+ <li class="none">
+ <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">Sponsorship</a>
+ </li>
+
+ <li class="none">
+ <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">Thanks</a>
+ </li>
+ </ul>
+
+ <div id="search">
+ <h5>Search with Apache Solr</h5>
+ <form action="http://search.lucidimagination.com/p:tika" method="get" id="searchform">
+ <input type="text" id="query" name="q">
+ <select name="searchProvider" id="searchProvider">
+ <option value="any">provider</option>
+ <option value="lucid">Lucid Find</option>
+ <option value="sl">Search-Lucene</option>
+ </select>
+ <input type="submit" id="submit" value="Search" name="Search" onclick="selectProvider(this.form)">
+ </form>
+ </div>
+
+ <div id="bookpromo">
+ <h5>Books about Tika</h5>
+ <p>
+ <a href="http://manning.com/mattmann/" title="Tika in Action"><img src="./mattmann_cover150.jpg" width="150" height="186"></a>
+ </p>
+ </div>
+ </div>
+ </div>
+ <div id="footer">
+ <p>
+ Copyright © 2011
+ <a href="http://www.apache.org/">The Apache Software Foundation</a>.
+ Site powered by <a href="http://maven.apache.org/">Apache Maven</a>.
+ Search powered by
+ <a href="http://www.lucidimagination.com">Lucid Imagination</a>
+ and <a href="http://sematext.com">Sematext</a>.
+ <br>
+ Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
+ Tika project logo are trademarks of The Apache Software Foundation.
+ </p>
+ </div>
+ </div>
+
+
+</body></html>Ytext/htmlUUTF-8P_http://tika.apache.org/�$�_WebResourceResponseO�/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+body {
+ font-family: serif;
+ font-size: 13pt;
+ background-color: #eee;
+ margin: 0;
+ padding: 0;
+}
+
+#body {
+ width: 800px;
+ height: 100%;
+ margin: 20px auto;
+ left: auto;
+ right: auto;
+ background-color: white;
+ padding: 20px;
+ border: 1px solid #CCC;
+ -moz-border-radius: 15px;
+ border-radius: 15px;
+ -moz-box-shadow: 1ex 1ex 1ex #666;
+ -webkit-box-shadow: 1ex 1ex 1ex #666;
+ box-shadow: 5px 5px 5px #666;
+}
+
+#banner {
+ height: 100px;
+ padding-bottom: 1em;
+ border-bottom: 1px solid #eee;
+}
+
+#bannerLeft {
+ float: left;
+}
+
+#bannerRight {
+ float: right;
+}
+
+#content {
+ width: 600px;
+ float: left;
+ line-height: 1.3em;
+}
+
+#navigation {
+ width: 180px;
+ float: right;
+ font-size: 12px;
+}
+
+#navigation h5 {
+ font-size: 12px;
+ margin-bottom: 1ex;
+}
+
+#navigation ul {
+ margin: 0;
+ padding: 0;
+}
+
+#navigation li {
+ list-style-type: none;
+ list-style-position: inside;
+}
+
+#navigation li ul {
+ margin-left: 20px;
+}
+
+#navigation li.expanded {
+ list-style-type: disc;
+}
+
+#navigation li.collapsed {
+ list-style-type: circle;
+}
+
+#navigation strong {
+ font-weight: normal;
+}
+
+#navigation a {
+ text-decoration: none;
+}
+
+#navigation form {
+ text-align: right;
+}
+
+#query {
+ width: 100%;
+ border: 1px solid #eee;
+}
+
+#searchProvider, #submit {
+ width: 48%;
+}
+
+#bookpromo p {
+ text-align: center;
+}
+
+#footer {
+ clear: both;
+ border-top: 1px solid #eee;
+ font-size: 8pt;
+ color: gray;
+ text-align: center;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ font-family: sans-serif;
+ color: #900;
+}
+
+li {
+ margin-top: 2px;
+}
+
+a:link {
+ color: #36a;
+}
+a:visited {
+ color:#47a;
+}
+a:active, a:hover {
+ color:#69c;
+}
+a.externalLink {
+ background: url(../images/external.png) right center no-repeat;
+ padding-right: 18px;
+}
+
+img {
+ border: 0;
+}
+
+pre {
+ border: 1px solid #ccc;
+ background-color: #eee;
+ padding: 1ex;
+ overflow: auto;
+}
+
+/* From maven-theme.css */
+
+table.bodyTable th {
+ color: white;
+ background-color: #bbb;
+ text-align: left;
+ font-weight: bold;
+}
+
+table.bodyTable th, table.bodyTable td {
+ font-size: 1em;
+}
+
+table.bodyTable tr.a {
+ background-color: #ddd;
+}
+
+table.bodyTable tr.b {
+ background-color: #eee;
+}
+
+dt {
+ color: #900;
+ font-weight: bold;
+}
+dd {
+ margin-bottom: 1ex;
+}
+
+.errormark, .warningmark, .donemark, .infomark {
+ background: url(../images/icon_error_sml.gif) no-repeat;
+}
+
+.warningmark {
+ background-image: url(../images/icon_warning_sml.gif);
+}
+
+.donemark {
+ background-image: url(../images/icon_success_sml.gif);
+}
+
+.infomark {
+ background-image: url(../images/icon_info_sml.gif);
+}
+
+/* From maven-base.css */
+
+table {
+ padding:0px;
+ width: 100%;
+ margin-left: -2px;
+ margin-right: -2px;
+}
+acronym {
+ cursor: help;
+ border-bottom: 1px dotted #feb;
+}
+table.bodyTable th, table.bodyTable td {
+ padding: 2px 4px 2px 4px;
+ vertical-align: top;
+}
+
+Xtext/css_#http://tika.apache.org/css/site.cssO�bplist00�noX$versionX$objectsY$archiverT$top ���""()012NOPQRSTUVWXYZ[\]^_`abcdhiU$null�
+
!R$6S$10R$2R$7R$3S$11R$8V$classR$4R$9R$0R$5R$1��� � �!�� ��#$%&[NS.relativeWNS.base��� _#http://tika.apache.org/css/site.css�*+,-Z$classnameX$classesUNSURL�./UNSURLXNSObject#A���6���3456BWNS.keysZNS.objects��789:;<=>?@A� �
+���
�������CDEFGHIJKLM�����������TVaryVServerZConnection]Last-Modified\Content-Type]Accept-RangesTDate_Content-Encoding^Content-LengthZKeep-AliveTEtag_Accept-Encoding_:Apache/2.3.15-dev (Unix) mod_ssl/2.3.15-dev OpenSSL/1.0.0cZKeep-Alive_Sun, 31 Oct 2010 21:50:22 GMTXtext/cssUbytes_Tue, 13 Dec 2011 18:55:14 GMTTgzipT1418_timeout=5, max=100_"d1b503-eaa-493f0adabe380-gzip"�*+ef_NSMutableDictionary�eg/\NSDictionary��*+jk_NSHTTPURLResponse�lm/_NSHTTPURLRespon [...]
+
+
IHDR Ӻ& gAMA ��7�� tEXtSoftware Adobe ImageReadyq�e<