You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/02/19 21:25:31 UTC
[52/52] [partial] tika git commit: move test files to parser-modules
move test files to parser-modules
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/38916f89
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/38916f89
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/38916f89
Branch: refs/heads/2.x-test-doc-reshuffle
Commit: 38916f89c179ec9d2fc3e425edc4b87e1470167e
Parents: 18d0285
Author: tballison <ta...@mitre.org>
Authored: Fri Feb 19 15:23:10 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Fri Feb 19 15:24:06 2016 -0500
----------------------------------------------------------------------
pom.xml | 1 -
tika-parser-modules/pom.xml | 11 +-
.../tika/parser/ner/opennlp/ModelGetter.groovy | 93 +
.../tika/parser/ner/opennlp/get-models.sh | 26 +
.../apache/tika/parser/ner/regex/ner-regex.txt | 17 +
.../org/apache/tika/parser/ner/tika-config.xml | 27 +
.../resources/test-documents/testCADKEY.prt | Bin 0 -> 10246 bytes
.../resources/test-documents/testCADKEY2.prt | Bin 0 -> 41664 bytes
.../resources/test-documents/testDWG2000.dwg | Bin 0 -> 675048 bytes
.../resources/test-documents/testDWG2004.dwg | Bin 0 -> 39335 bytes
.../test-documents/testDWG2004_no_header.dwg | Bin 0 -> 38178 bytes
.../resources/test-documents/testDWG2007.dwg | Bin 0 -> 73088 bytes
.../resources/test-documents/testDWG2010.dwg | Bin 0 -> 59562 bytes
.../test-documents/testDWG2010_custom_props.dwg | Bin 0 -> 73791 bytes
.../test-documents/testDWGmech2004.dwg | Bin 0 -> 439438 bytes
.../test-documents/testDWGmech2004DX.dwg | Bin 0 -> 439563 bytes
.../test-documents/testDWGmech2005.dwg | Bin 0 -> 439627 bytes
.../test-documents/testDWGmech2006.dwg | Bin 0 -> 439982 bytes
.../test-documents/testDWGmech2007.dwg | Bin 0 -> 479904 bytes
.../test-documents/testDWGmech2008.dwg | Bin 0 -> 487456 bytes
.../test-documents/testDWGmech2009.dwg | Bin 0 -> 483840 bytes
.../test-documents/testDWGmech2010.dwg | Bin 0 -> 467023 bytes
.../test-documents/testDWGmech2011.dwg | Bin 0 -> 466891 bytes
.../resources/test-documents/testDWGmech6.dwg | Bin 0 -> 687882 bytes
.../test/resources/test-documents/testCPP.cpp | 14 +
.../resources/test-documents/testGROOVY.groovy | 9 +
.../test/resources/test-documents/testJAVA.java | 14 +
.../resources/test-documents/testLinux-x86-32 | Bin 0 -> 7175 bytes
.../test-documents/testWindows-x86-32.exe | Bin 0 -> 11723 bytes
.../resources/test-documents/testSqlite3b.db | Bin 0 -> 27648 bytes
.../test/resources/test-documents/testEPUB.epub | Bin 0 -> 29719 bytes
.../resources/test-documents/testiBooks.ibooks | Bin 0 -> 970636 bytes
.../test-documents/testJournalParser.pdf | Bin 0 -> 985125 bytes
.../src/test/resources/test-documents/test2.swf | Bin 0 -> 42534 bytes
.../test/resources/test-documents/testAFM.afm | 50 +
.../test/resources/test-documents/testAIFF.aif | Bin 0 -> 3894 bytes
.../src/test/resources/test-documents/testAU.au | Bin 0 -> 3868 bytes
.../test/resources/test-documents/testBMP.bmp | Bin 0 -> 22554 bytes
.../test/resources/test-documents/testBPG.bpg | Bin 0 -> 1824 bytes
.../resources/test-documents/testBPG_GEO.bpg | Bin 0 -> 2042 bytes
.../test-documents/testBPG_commented.bpg | Bin 0 -> 10281 bytes
.../testBPG_commented_xnviewmp026.bpg | Bin 0 -> 12374 bytes
.../test/resources/test-documents/testFLV.flv | Bin 0 -> 90580 bytes
.../test/resources/test-documents/testGIF.gif | Bin 0 -> 8495 bytes
.../test/resources/test-documents/testJPEG.jpg | Bin 0 -> 7686 bytes
.../resources/test-documents/testJPEG_EXIF.jpg | Bin 0 -> 16357 bytes
.../testJPEG_EXIF_emptyDateTime.jpg | Bin 0 -> 24597 bytes
.../resources/test-documents/testJPEG_GEO.jpg | Bin 0 -> 16482 bytes
.../resources/test-documents/testJPEG_GEO_2.jpg | Bin 0 -> 20844 bytes
.../test-documents/testJPEG_commented.jpg | Bin 0 -> 13325 bytes
.../testJPEG_commented_pspcs2mac.jpg | Bin 0 -> 26173 bytes
.../testJPEG_commented_xnviewmp026.jpg | Bin 0 -> 13910 bytes
.../test-documents/testJPEG_oddTagComponent.jpg | Bin 0 -> 8330 bytes
.../test/resources/test-documents/testMID.mid | Bin 0 -> 322 bytes
.../resources/test-documents/testMP3i18n.mp3 | Bin 0 -> 40832 bytes
.../resources/test-documents/testMP3id3v1.mp3 | Bin 0 -> 39416 bytes
.../test-documents/testMP3id3v1_v2.mp3 | Bin 0 -> 40960 bytes
.../resources/test-documents/testMP3id3v2.mp3 | Bin 0 -> 39577 bytes
.../resources/test-documents/testMP3id3v24.mp3 | Bin 0 -> 39471 bytes
.../resources/test-documents/testMP3lyrics.mp3 | Bin 0 -> 34688 bytes
.../resources/test-documents/testMP3noid3.mp3 | Bin 0 -> 39288 bytes
.../test-documents/testMP3truncated.mp3 | Bin 0 -> 65536 bytes
.../test/resources/test-documents/testMP4.m4a | Bin 0 -> 4770 bytes
.../test-documents/testNakedUTF16BOM.mp3 | Bin 0 -> 2625 bytes
.../test/resources/test-documents/testOCR.docx | Bin 0 -> 62041 bytes
.../test/resources/test-documents/testOCR.jpg | Bin 0 -> 3408 bytes
.../test/resources/test-documents/testOCR.pdf | Bin 0 -> 41936 bytes
.../test/resources/test-documents/testOCR.pptx | Bin 0 -> 78550 bytes
.../test/resources/test-documents/testPNG.png | Bin 0 -> 17041 bytes
.../test/resources/test-documents/testPSD.psd | Bin 0 -> 69410 bytes
.../test/resources/test-documents/testPSD2.psd | Bin 0 -> 31315 bytes
.../test-documents/testRFC822-multipart | 111 +
.../test/resources/test-documents/testTIFF.tif | Bin 0 -> 25584 bytes
.../resources/test-documents/testTrueType3.ttf | Bin 0 -> 224592 bytes
.../test/resources/test-documents/testWAV.wav | Bin 0 -> 3884 bytes
.../test/resources/test-documents/testWEBP.webp | Bin 0 -> 3442 bytes
.../test-documents/testWebp_Alpha_Lossless.webp | Bin 0 -> 92312 bytes
.../test-documents/testWebp_Alpha_Lossy.webp | Bin 0 -> 23404 bytes
.../TesseractOCRConfig-full.properties | 22 +
.../TesseractOCRConfig-partial.properties | 18 +
.../test/resources/test-documents/Doc1_ole.doc | Bin 0 -> 89600 bytes
.../test-documents/EmbeddedDocument.docx | Bin 0 -> 13219 bytes
.../test-documents/EmbeddedOutlook.docx | Bin 0 -> 113242 bytes
.../resources/test-documents/EmbeddedPDF.docx | Bin 0 -> 99389 bytes
.../resources/test-documents/NullHeader.docx | Bin 0 -> 4355 bytes
.../resources/test-documents/chm/IMJPCL.CHM | Bin 0 -> 757069 bytes
.../resources/test-documents/chm/IMJPCLE.CHM | Bin 0 -> 256718 bytes
.../resources/test-documents/chm/IMTCEN.CHM | Bin 0 -> 452547 bytes
.../test/resources/test-documents/chm/admin.chm | Bin 0 -> 49749 bytes
.../resources/test-documents/chm/cmak_ops.CHM | Bin 0 -> 82895 bytes
.../resources/test-documents/chm/comexp.CHM | Bin 0 -> 109882 bytes
.../resources/test-documents/chm/gpedit.CHM | Bin 0 -> 49537 bytes
.../test/resources/test-documents/chm/tcpip.CHM | Bin 0 -> 33186 bytes
.../resources/test-documents/chm/wmicontrol.CHM | Bin 0 -> 32096 bytes
.../test/resources/test-documents/complex.mbox | 291 +
.../resources/test-documents/footnotes.docx | Bin 0 -> 12823 bytes
.../resources/test-documents/headerPic.docx | Bin 0 -> 16206 bytes
.../test/resources/test-documents/headers.mbox | 7 +
.../src/test/resources/test-documents/jxl.xls | Bin 0 -> 614912 bytes
.../resources/test-documents/multiline.mbox | 5 +
.../test/resources/test-documents/pictures.ppt | Bin 0 -> 75776 bytes
.../test/resources/test-documents/protect.xlsx | Bin 0 -> 12968 bytes
.../resources/test-documents/protectedFile.xlsx | Bin 0 -> 12968 bytes
.../test-documents/protectedSheets.xlsx | Bin 0 -> 11236 bytes
.../test/resources/test-documents/quoted.mbox | 4 +
.../test/resources/test-documents/simple.mbox | 7 +
.../resources/test-documents/test-outlook.msg | Bin 0 -> 19968 bytes
.../test-documents/test-outlook2003.msg | Bin 0 -> 83968 bytes
.../src/test/resources/test-documents/test.doc | Bin 0 -> 9216 bytes
.../resources/test-documents/testAccess2.accdb | Bin 0 -> 794624 bytes
.../test-documents/testAccess2_2000.mdb | Bin 0 -> 421888 bytes
.../test-documents/testAccess2_2002-2003.mdb | Bin 0 -> 417792 bytes
.../test-documents/testAccess2_encrypted.accdb | Bin 0 -> 557056 bytes
.../test-documents/testAccess_V1997.mdb | Bin 0 -> 118784 bytes
.../test-documents/testBinControlWord.rtf | 2 +
.../test/resources/test-documents/testChm.chm | Bin 0 -> 186259 bytes
.../test/resources/test-documents/testChm2.chm | Bin 0 -> 10807437 bytes
.../test/resources/test-documents/testChm3.chm | Bin 0 -> 900481 bytes
.../test-documents/testControlCharacters.doc | Bin 0 -> 448000 bytes
.../test-documents/testDOCX_Thumbnail.docx | Bin 0 -> 13810 bytes
.../test-documents/testDocumentLink.doc | Bin 0 -> 812032 bytes
.../test-documents/testEXCEL-charts.xls | Bin 0 -> 15360 bytes
.../test-documents/testEXCEL-formats.xls | Bin 0 -> 13824 bytes
.../test-documents/testEXCEL-formats.xlsx | Bin 0 -> 8303 bytes
.../test-documents/testEXCEL.strict.xlsx | Bin 0 -> 10006 bytes
.../test/resources/test-documents/testEXCEL.xls | Bin 0 -> 13824 bytes
.../resources/test-documents/testEXCEL.xlsb | Bin 0 -> 9161 bytes
.../resources/test-documents/testEXCEL.xlsx | Bin 0 -> 9453 bytes
.../resources/test-documents/testEXCEL_1img.xls | Bin 0 -> 20992 bytes
.../test-documents/testEXCEL_1img.xlsx | Bin 0 -> 14552 bytes
.../resources/test-documents/testEXCEL_4.xls | Bin 0 -> 39942 bytes
.../resources/test-documents/testEXCEL_5.xls | Bin 0 -> 7168 bytes
.../resources/test-documents/testEXCEL_95.xls | Bin 0 -> 20992 bytes
.../test-documents/testEXCEL_custom_props.xls | Bin 0 -> 17408 bytes
.../test-documents/testEXCEL_custom_props.xlsx | Bin 0 -> 9230 bytes
.../test-documents/testEXCEL_embeded.xls | Bin 0 -> 303104 bytes
.../test-documents/testEXCEL_embeded.xlsx | Bin 0 -> 348405 bytes
.../testEXCEL_headers_footers.xls | Bin 0 -> 33792 bytes
.../testEXCEL_headers_footers.xlsx | Bin 0 -> 11740 bytes
.../testEXCEL_protected_passtika.xls | Bin 0 -> 17408 bytes
.../testEXCEL_protected_passtika.xlsx | Bin 0 -> 12800 bytes
.../test-documents/testEXCEL_textbox.xlsx | Bin 0 -> 11017 bytes
.../resources/test-documents/testException1.doc | Bin 0 -> 49152 bytes
.../resources/test-documents/testException2.doc | Bin 0 -> 58368 bytes
.../testFontAfterBufferedText.rtf | 7 +
.../resources/test-documents/testFooter.ods | Bin 0 -> 7207 bytes
.../resources/test-documents/testFooter.odt | Bin 0 -> 8381 bytes
.../test/resources/test-documents/testMSG.msg | Bin 0 -> 20480 bytes
.../test-documents/testMSG_att_doc.msg | Bin 0 -> 52224 bytes
.../test-documents/testMSG_att_msg.msg | Bin 0 -> 71680 bytes
.../test-documents/testMSG_chinese.msg | Bin 0 -> 48129 bytes
.../test-documents/testMSG_forwarded.msg | Bin 0 -> 25600 bytes
.../test-documents/testMasterFooter.odp | Bin 0 -> 13975 bytes
.../test-documents/testNPEOpenDocument.odt | Bin 0 -> 18304 bytes
.../test-documents/testODFwithOOo3.odt | Bin 0 -> 24286 bytes
.../test-documents/testODT-TIKA-6000.odt | Bin 0 -> 3888830 bytes
.../test-documents/testOpenOffice2.odf | Bin 0 -> 10977 bytes
.../test-documents/testOpenOffice2.odt | Bin 0 -> 26448 bytes
.../test/resources/test-documents/testPPM.ppm | 4 +
.../test/resources/test-documents/testPPT.potm | Bin 0 -> 40102 bytes
.../test/resources/test-documents/testPPT.ppsm | Bin 0 -> 36545 bytes
.../test/resources/test-documents/testPPT.ppsx | Bin 0 -> 36521 bytes
.../test/resources/test-documents/testPPT.ppt | Bin 0 -> 16384 bytes
.../test/resources/test-documents/testPPT.pptm | Bin 0 -> 36541 bytes
.../test/resources/test-documents/testPPT.pptx | Bin 0 -> 36518 bytes
.../test/resources/test-documents/testPPT.thmx | Bin 0 -> 42485 bytes
.../test/resources/test-documents/testPPT.xps | Bin 0 -> 75442 bytes
.../test-documents/testPPTX_Thumbnail.pptx | Bin 0 -> 42580 bytes
.../resources/test-documents/testPPT_2imgs.pptx | Bin 0 -> 59246 bytes
.../test-documents/testPPT_autodate.ppt | Bin 0 -> 148992 bytes
.../test-documents/testPPT_autodate.pptx | Bin 0 -> 47707 bytes
.../test-documents/testPPT_comment.ppt | Bin 0 -> 86016 bytes
.../test-documents/testPPT_comment.pptx | Bin 0 -> 30939 bytes
.../test-documents/testPPT_custom_props.ppt | Bin 0 -> 104960 bytes
.../test-documents/testPPT_custom_props.pptx | Bin 0 -> 37864 bytes
.../test-documents/testPPT_embedded2.ppt | Bin 0 -> 92160 bytes
.../testPPT_embedded_two_slides.pptx | Bin 0 -> 255364 bytes
.../test-documents/testPPT_embeded.ppt | Bin 0 -> 224768 bytes
.../test-documents/testPPT_embeded.pptx | Bin 0 -> 202969 bytes
.../test-documents/testPPT_masterFooter.ppt | Bin 0 -> 139776 bytes
.../test-documents/testPPT_masterFooter.pptx | Bin 0 -> 35128 bytes
.../test-documents/testPPT_masterText.ppt | Bin 0 -> 117760 bytes
.../test-documents/testPPT_masterText.pptx | Bin 0 -> 32270 bytes
.../test-documents/testPPT_masterText2.ppt | Bin 0 -> 102912 bytes
.../test-documents/testPPT_masterText2.pptx | Bin 0 -> 32291 bytes
.../testPPT_protected_passtika.ppt | Bin 0 -> 43008 bytes
.../testPPT_protected_passtika.pptx | Bin 0 -> 41472 bytes
.../test-documents/testPPT_various.ppt | Bin 0 -> 160768 bytes
.../test-documents/testPPT_various.pptx | Bin 0 -> 56659 bytes
.../test-documents/testPROJECT2003.mpp | Bin 0 -> 125440 bytes
.../test-documents/testPROJECT2007.mpp | Bin 0 -> 147968 bytes
.../test/resources/test-documents/testPST.pst | Bin 0 -> 271360 bytes
.../resources/test-documents/testPUBLISHER.pub | Bin 0 -> 65536 bytes
.../resources/test-documents/testRTF-ms932.rtf | 30 +
.../test/resources/test-documents/testRTF.rtf | 17 +
.../test-documents/testRTFBoldItalic.rtf | 164 +
.../test-documents/testRTFControls.rtf | 165 +
.../testRTFCorruptListOverride.rtf | 95 +
.../test-documents/testRTFEmbeddedFiles.rtf | 6856 ++++++++++++++++++
.../test-documents/testRTFEmbeddedLink.rtf | 1438 ++++
.../testRTFHexEscapeInsideWord.rtf | 4 +
.../test-documents/testRTFHyperlink.rtf | 598 ++
.../testRTFIgnoredControlWord.rtf | 17 +
.../test-documents/testRTFInvalidUnicode.rtf | 11 +
.../test-documents/testRTFJapanese.rtf | 87 +
.../test-documents/testRTFListLibreOffice.rtf | 67 +
.../test-documents/testRTFListMicrosoftWord.rtf | 227 +
.../test-documents/testRTFListOverride.rtf | 424 ++
.../test-documents/testRTFNewlines.rtf | 27 +
.../test-documents/testRTFRegularImages.rtf | 1241 ++++
.../testRTFTableCellSeparation.rtf | 7 +
.../testRTFTableCellSeparation2.rtf | 3 +
.../test-documents/testRTFUmlautSpaces.rtf | 3 +
.../test-documents/testRTFUmlautSpaces2.rtf | 8 +
.../test-documents/testRTFUnicodeGothic.rtf | 5 +
...TFUnicodeUCNControlWordCharacterDoubling.rtf | 8 +
.../resources/test-documents/testRTFVarious.rtf | 329 +
.../testRTFWindowsCodepage1250.rtf | 5 +
.../test-documents/testRTFWithCurlyBraces.rtf | 44 +
.../testRTFWord2010CzechCharacters.rtf | 190 +
.../testRTFWordPadCzechCharacters.rtf | 5 +
.../resources/test-documents/testStyles.odt | Bin 0 -> 11663 bytes
.../test/resources/test-documents/testVISIO.vsd | Bin 0 -> 45568 bytes
.../resources/test-documents/testWINMAIL.dat | Bin 0 -> 66276 bytes
.../test/resources/test-documents/testWORD.doc | Bin 0 -> 32768 bytes
.../test/resources/test-documents/testWORD6.doc | Bin 0 -> 6656 bytes
.../resources/test-documents/testWORD_1img.doc | Bin 0 -> 14848 bytes
.../resources/test-documents/testWORD_1img.docx | Bin 0 -> 8325 bytes
.../resources/test-documents/testWORD_3imgs.doc | Bin 0 -> 36352 bytes
.../test-documents/testWORD_3imgs.docx | Bin 0 -> 31303 bytes
.../testWORD_bold_character_runs.doc | Bin 0 -> 22016 bytes
.../testWORD_bold_character_runs.docx | Bin 0 -> 12912 bytes
.../testWORD_bold_character_runs2.doc | Bin 0 -> 22016 bytes
.../testWORD_bold_character_runs2.docx | Bin 0 -> 12863 bytes
.../testWORD_closingSmartQInHyperLink.doc | Bin 0 -> 26624 bytes
.../test-documents/testWORD_custom_props.doc | Bin 0 -> 22528 bytes
.../test-documents/testWORD_custom_props.docx | Bin 0 -> 13942 bytes
.../test-documents/testWORD_embedded_pdf.doc | Bin 0 -> 1491456 bytes
.../test-documents/testWORD_embedded_pdf.docx | Bin 0 -> 63294 bytes
.../test-documents/testWORD_embedded_rtf.doc | Bin 0 -> 16384 bytes
.../test-documents/testWORD_embeded.doc | Bin 0 -> 319488 bytes
.../test-documents/testWORD_embeded.docx | Bin 0 -> 157830 bytes
.../testWORD_header_hyperlink.doc | Bin 0 -> 22528 bytes
.../testWORD_missing_ooxml_bean1.docx | Bin 0 -> 17913 bytes
.../test-documents/testWORD_missing_text.docx | Bin 0 -> 31592 bytes
.../test-documents/testWORD_multi_authors.doc | Bin 0 -> 22528 bytes
.../test-documents/testWORD_multi_authors.docx | Bin 0 -> 12054 bytes
.../test-documents/testWORD_no_format.doc | Bin 0 -> 74752 bytes
.../test-documents/testWORD_no_format.docx | Bin 0 -> 37018 bytes
.../test-documents/testWORD_null_style.docx | Bin 0 -> 29018 bytes
.../test-documents/testWORD_numbered_list.doc | Bin 0 -> 44032 bytes
.../test-documents/testWORD_numbered_list.docx | Bin 0 -> 24696 bytes
.../testWORD_override_list_numbering.doc | Bin 0 -> 56320 bytes
.../testWORD_override_list_numbering.docx | Bin 0 -> 15746 bytes
.../testWORD_protected_passtika.docx | Bin 0 -> 14336 bytes
.../test-documents/testWORD_tabular_symbol.doc | Bin 0 -> 10240 bytes
.../test-documents/testWORD_text_box.docx | Bin 0 -> 25271 bytes
.../test-documents/testWORD_various.doc | Bin 0 -> 35328 bytes
.../test-documents/testWORD_various.docx | Bin 0 -> 19169 bytes
.../test-documents/testWORKSSpreadsheet7.0.xlr | Bin 0 -> 10752 bytes
.../resources/test-documents/testWordArt.pptx | Bin 0 -> 37792 bytes
.../test-documents/testXLSX_Thumbnail.xlsx | Bin 0 -> 10318 bytes
.../resources/test-documents/test_TIKA-1251.doc | Bin 0 -> 50688 bytes
.../test-documents/test_embedded_package.rtf | 71 +
.../test-documents/test_embedded_zip.pptx | Bin 0 -> 345027 bytes
.../testsolidworksAssembly2013SP2.SLDASM | Bin 0 -> 209408 bytes
.../testsolidworksAssembly2014SP0.SLDASM | Bin 0 -> 238080 bytes
.../testsolidworksDrawing2013SP2.SLDDRW | Bin 0 -> 180224 bytes
.../testsolidworksDrawing2014SP0.SLDDRW | Bin 0 -> 201216 bytes
.../testsolidworksPart2013SP2.SLDPRT | Bin 0 -> 1010176 bytes
.../testsolidworksPart2014SP0.SLDPRT | Bin 0 -> 1043456 bytes
.../src/test/resources/test-documents/moby.zip | Bin 0 -> 606033 bytes
.../test-documents/tableHeaders.numbers | Bin 0 -> 89554 bytes
.../resources/test-documents/tableNames.numbers | Bin 0 -> 88246 bytes
.../resources/test-documents/test-documents.7z | Bin 0 -> 66817 bytes
.../resources/test-documents/test-documents.rar | Bin 0 -> 67945 bytes
.../resources/test-documents/test-documents.tar | Bin 0 -> 133120 bytes
.../test-documents/test-documents.tar.Z | Bin 0 -> 103647 bytes
.../test-documents/test-documents.tbz2 | Bin 0 -> 71127 bytes
.../resources/test-documents/test-documents.tgz | Bin 0 -> 69060 bytes
.../resources/test-documents/test-documents.zip | Bin 0 -> 68403 bytes
.../test-documents/test7Z_protected_passTika.7z | Bin 0 -> 260 bytes
.../resources/test-documents/testARofSND.ar | Bin 0 -> 3936 bytes
.../resources/test-documents/testARofText.ar | 5 +
.../test-documents/testBulletPoints.key | Bin 0 -> 213830 bytes
.../resources/test-documents/testEmbedded.zip | Bin 0 -> 340 bytes
.../resources/test-documents/testKeynote.key | Bin 0 -> 221745 bytes
.../test-documents/testMasterSlideTable.key | Bin 0 -> 220184 bytes
.../test-documents/testNumbers.numbers | Bin 0 -> 134571 bytes
.../test-documents/testNumbersCharts.numbers | Bin 0 -> 104547 bytes
.../test-documents/testOpenOffice2.odt | Bin 0 -> 26448 bytes
.../resources/test-documents/testPages.pages | Bin 0 -> 134152 bytes
.../test-documents/testPagesComments.pages | Bin 0 -> 154546 bytes
.../testPagesHeadersFootersAlphaLower.pages | Bin 0 -> 168501 bytes
.../testPagesHeadersFootersAlphaUpper.pages | Bin 0 -> 168995 bytes
.../testPagesHeadersFootersFootnotes.pages | Bin 0 -> 177328 bytes
.../testPagesHeadersFootersRomanLower.pages | Bin 0 -> 103923 bytes
.../testPagesHeadersFootersRomanUpper.pages | Bin 0 -> 174197 bytes
.../test-documents/testPagesLayout.pages | Bin 0 -> 66480 bytes
.../test-documents/testPagesPwdProtected.pages | Bin 0 -> 33166 bytes
.../test/resources/test-documents/testSVG.svgz | Bin 0 -> 222 bytes
.../test/resources/test-documents/testTXT.zlib | Bin 0 -> 55 bytes
.../resources/test-documents/testTables.key | Bin 0 -> 216497 bytes
.../resources/test-documents/testTextBoxes.key | Bin 0 -> 208981 bytes
.../test-documents/testAnnotations.pdf | Bin 0 -> 18580 bytes
.../test-documents/testExtraSpaces.pdf | Bin 0 -> 20743 bytes
.../test-documents/testOverlappingText.pdf | Bin 0 -> 899 bytes
.../test-documents/testPDF-custommetadata.pdf | Bin 0 -> 7495 bytes
.../test/resources/test-documents/testPDF.pdf | Bin 0 -> 34824 bytes
.../testPDFEmbeddingAndEmbedded.docx | Bin 0 -> 34139 bytes
.../testPDFFileEmbInAnnotation.pdf | Bin 0 -> 97211 bytes
.../resources/test-documents/testPDFPackage.pdf | Bin 0 -> 92359 bytes
.../test-documents/testPDFTripleLangTitle.pdf | Bin 0 -> 1719 bytes
.../test-documents/testPDFTwoTextBoxes.pdf | Bin 0 -> 57100 bytes
.../resources/test-documents/testPDFVarious.pdf | Bin 0 -> 205491 bytes
.../testPDF_PDFEncodedStringInXMP.pdf | Bin 0 -> 7210 bytes
.../test-documents/testPDF_Version.10.x.pdf | Bin 0 -> 5811 bytes
.../testPDF_Version.11.x.PDFA-1b.pdf | Bin 0 -> 23081 bytes
.../test-documents/testPDF_Version.4.x.pdf | Bin 0 -> 10007 bytes
.../test-documents/testPDF_Version.5.x.pdf | Bin 0 -> 5953 bytes
.../test-documents/testPDF_Version.6.x.pdf | Bin 0 -> 5903 bytes
.../test-documents/testPDF_Version.7.x.pdf | Bin 0 -> 5903 bytes
.../test-documents/testPDF_Version.8.x.pdf | Bin 0 -> 5903 bytes
.../test-documents/testPDF_Version.9.x.pdf | Bin 0 -> 5998 bytes
.../test-documents/testPDF_acroform3.pdf | Bin 0 -> 26746 bytes
.../resources/test-documents/testPDF_bom.pdf | Bin 0 -> 7645 bytes
.../test-documents/testPDF_bookmarks.pdf | Bin 0 -> 9487 bytes
.../test-documents/testPDF_childAttachments.pdf | Bin 0 -> 2318262 bytes
.../testPDF_multiFormatEmbFiles.pdf | Bin 0 -> 2662 bytes
..._no_extract_no_accessibility_owner_empty.pdf | 87 +
...F_no_extract_no_accessibility_owner_user.pdf | 87 +
...no_extract_yes_accessibility_owner_empty.pdf | 87 +
..._no_extract_yes_accessibility_owner_user.pdf | 87 +
.../test-documents/testPDF_protected.pdf | Bin 0 -> 506064 bytes
.../test-documents/testPDF_twoAuthors.pdf | Bin 0 -> 12628 bytes
.../test-documents/WFPC2u5780205r_c0fx.fits | 1814 +++++
.../resources/test-documents/Zamora2010.dif | 169 +
.../breidamerkurjokull_radar_profiles_2009.mat | Bin 0 -> 14748772 bytes
.../test-documents/envi_test_header.hdr | 16 +
.../gdas1.forecmwf.2014062612.grib2 | Bin 0 -> 2489194 bytes
.../test-documents/sampleFile.iso19139 | 453 ++
.../sresa1b_ncar_ccsm3_0_run1_200001.nc | Bin 0 -> 2767916 bytes
.../src/test/resources/test-documents/test.hdf | Bin 0 -> 542529 bytes
.../src/test/resources/test-documents/test.he5 | Bin 0 -> 1396916 bytes
...-2_metabolite profiling_NMR spectroscopy.txt | 51 +
.../testISATab_BII-I-1/a_metabolome.txt | 112 +
.../testISATab_BII-I-1/a_microarray.txt | 15 +
.../testISATab_BII-I-1/a_proteome.txt | 19 +
.../testISATab_BII-I-1/a_transcriptome.txt | 49 +
.../testISATab_BII-I-1/i_investigation.txt | 164 +
.../testISATab_BII-I-1/s_BII-S-1.txt | 165 +
.../testISATab_BII-I-1/s_BII-S-2.txt | 15 +
.../resources/test-documents/test_mat_text.mat | Bin 0 -> 183 bytes
.../resources/test-documents/english.cp500.txt | 1 +
.../test/resources/test-documents/resume.html | 73 +
.../resources/test-documents/russian.cp866.txt | 6 +
.../src/test/resources/test-documents/test.fb2 | 350 +
.../test-documents/testOCTET_header.dbase3 | Bin 0 -> 194 bytes
.../test/resources/test-documents/testXML.xml | 48 +
.../test/resources/test-documents/testXML2.xml | 1 +
.../test/resources/test-documents/testXML3.xml | 23 +
.../StringsConfig-full.properties | 18 +
.../StringsConfig-partial.properties | 16 +
.../resources/test-documents/big-preamble.html | 827 +++
.../test-documents/boilerplate-whitespace.html | 27 +
.../resources/test-documents/boilerplate.html | 41 +
.../test/resources/test-documents/rsstest.rss | 36 +
.../test/resources/test-documents/testATOM.atom | 27 +
.../test/resources/test-documents/testHTML.html | 28 +
.../testHTMLNoisyMetaEncoding_1.html | 77 +
.../testHTMLNoisyMetaEncoding_2.html | 77 +
.../testHTMLNoisyMetaEncoding_3.html | 77 +
.../testHTMLNoisyMetaEncoding_4.html | 77 +
.../test/resources/test-documents/testRFC822 | 41 +
.../test-documents/testRFC822-limitedheaders | 9 +
.../test-documents/testRFC822-multipart | 111 +
.../resources/test-documents/testRFC822_base64 | 8 +
.../test-documents/testRFC822_encrypted_zip | 61 +
.../test-documents/testRFC822_i18nheaders | 9 +
.../test-documents/testRFC822_normal_zip | 61 +
.../resources/test-documents/testRFC822_oddfrom | 2105 ++++++
.../resources/test-documents/testRFC822_quoted | 13 +
.../test-documents/testUserDefinedCharset.mhtml | 21 +
.../resources/test-documents/testXHTML.html | 29 +
.../test/resources/test-documents/tika434.html | 914 +++
tika-parsers/pom.xml | 10 +-
.../parser/fork/ForkParserIntegrationTest.java | 2 +-
.../src/test/resources/log4j.properties | 24 +
.../apache/tika/config/TIKA-1558-blacklist.xml | 29 +
.../tika/config/TIKA-1558-blacklistsub.xml | 24 +
.../config/TIKA-1702-detector-blacklist.xml | 31 +
.../config/TIKA-1702-translator-default.xml | 24 +
.../TIKA-1702-translator-empty-default.xml | 22 +
.../tika/config/TIKA-1702-translator-empty.xml | 20 +
.../config/TIKA-1708-detector-composite.xml | 25 +
.../tika/config/TIKA-1708-detector-default.xml | 26 +
.../org/apache/tika/mime/custom-mimetypes.xml | 23 +
.../GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb | Bin 0 -> 1362900 bytes
.../resources/test-documents/NUTCH-1997.cbor | 30 +
.../test/resources/test-documents/TIKA-216.tgz | Bin 0 -> 1270 bytes
...tive_layer_arcss_grid_barrow_alaska_2012.dif | 61 +
...lues_of_alkanes_extracted_from_paleosols.dif | 84 +
.../test-documents/egyl03.gdas.200811.00Z.grb2 | Bin 0 -> 3447292 bytes
.../test-documents/mock/embedded_then_npe.xml | 36 +
.../resources/test-documents/mock/example.xml | 51 +
.../resources/test-documents/mock/fake_oom.xml | 25 +
.../test-documents/mock/heavy_hang.xml | 25 +
.../test-documents/mock/nothing_bad.xml | 26 +
.../test-documents/mock/null_pointer.xml | 25 +
.../test-documents/mock/null_pointer_no_msg.xml | 25 +
.../resources/test-documents/mock/real_oom.xml | 24 +
.../resources/test-documents/mock/sleep.xml | 25 +
.../test-documents/mock/sleep_interruptible.xml | 25 +
.../mock/sleep_not_interruptible.xml | 25 +
.../test-documents/test-documents-spanned.z01 | Bin 0 -> 65536 bytes
.../test-documents/test-documents-spanned.zip | Bin 0 -> 3488 bytes
.../test-documents/test-documents.cpio | Bin 0 -> 116224 bytes
.../test-documents/test-zip-of-zip.zip | Bin 0 -> 299 bytes
.../src/test/resources/test-documents/test1.swf | Bin 0 -> 21054 bytes
.../src/test/resources/test-documents/test3.swf | Bin 0 -> 51562 bytes
.../resources/test-documents/testACCESS.mdb | Bin 0 -> 110592 bytes
.../resources/test-documents/testAMR-WB.amr | Bin 0 -> 3609 bytes
.../test/resources/test-documents/testAMR.amr | Bin 0 -> 3620 bytes
.../test/resources/test-documents/testAPK.apk | Bin 0 -> 11740 bytes
.../test/resources/test-documents/testASF.asf | Bin 0 -> 62439 bytes
.../resources/test-documents/testASiCE.asice | Bin 0 -> 2916 bytes
.../resources/test-documents/testASiCS.asics | Bin 0 -> 2705 bytes
.../resources/test-documents/testBDB_btree_2.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_btree_3.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_btree_4.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_btree_5.db | Bin 0 -> 8192 bytes
.../resources/test-documents/testBDB_hash_2.db | Bin 0 -> 12288 bytes
.../resources/test-documents/testBDB_hash_3.db | Bin 0 -> 12288 bytes
.../resources/test-documents/testBDB_hash_4.db | Bin 0 -> 12288 bytes
.../resources/test-documents/testBDB_hash_5.db | Bin 0 -> 12288 bytes
.../resources/test-documents/testBIBTEX.bib | 21 +
.../test/resources/test-documents/testBMPfp.txt | 3 +
.../src/test/resources/test-documents/testC.c | 6 +
.../test/resources/test-documents/testCOREL.shw | Bin 0 -> 77824 bytes
.../test/resources/test-documents/testCSS.css | 48 +
.../test/resources/test-documents/testCSV.csv | 23 +
.../resources/test-documents/testComment.doc | Bin 0 -> 22528 bytes
.../resources/test-documents/testComment.docx | Bin 0 -> 11019 bytes
.../resources/test-documents/testComment.pdf | Bin 0 -> 68398 bytes
.../resources/test-documents/testComment.ppt | Bin 0 -> 101376 bytes
.../resources/test-documents/testComment.pptx | Bin 0 -> 34979 bytes
.../resources/test-documents/testComment.rtf | 169 +
.../resources/test-documents/testComment.xls | Bin 0 -> 23040 bytes
.../resources/test-documents/testComment.xlsx | Bin 0 -> 9692 bytes
.../test/resources/test-documents/testDITA.dita | 34 +
.../resources/test-documents/testDITA.ditamap | 23 +
.../resources/test-documents/testDITA2.dita | 33 +
.../test/resources/test-documents/testDOTM.dotm | Bin 0 -> 65527 bytes
.../resources/test-documents/testDetached.p7s | Bin 0 -> 2941 bytes
.../test/resources/test-documents/testEAR.ear | Bin 0 -> 1086 bytes
.../test/resources/test-documents/testEMF.emf | Bin 0 -> 60400 bytes
.../test/resources/test-documents/testEMLX.emlx | 55 +
.../test/resources/test-documents/testFITS.fits | 5 +
.../test/resources/test-documents/testFLAC.flac | Bin 0 -> 10604 bytes
.../test/resources/test-documents/testFLAC.oga | Bin 0 -> 10820 bytes
.../resources/test-documents/testFOXMAIL.box | 1327 ++++
.../resources/test-documents/testFreeBSD-x86-64 | Bin 0 -> 6369 bytes
.../test-documents/testGroupWiseEml.eml | 58 +
.../src/test/resources/test-documents/testH.h | 5 +
.../resources/test-documents/testHTML_utf8.html | 25 +
.../resources/test-documents/testHWP_3.0.hwp | Bin 0 -> 9287 bytes
.../resources/test-documents/testHWP_5.0.hwp | Bin 0 -> 16384 bytes
.../test/resources/test-documents/testINDD.indd | Bin 0 -> 880640 bytes
.../test/resources/test-documents/testIPA.ipa | Bin 0 -> 163803 bytes
.../test/resources/test-documents/testJAR.jar | Bin 0 -> 441 bytes
.../test-documents/testJAR_with_HTML.jar | Bin 0 -> 5594 bytes
.../test-documents/testJAR_with_PEHDR.jar | Bin 0 -> 35343 bytes
.../test-documents/testJAVAPROPS.properties | 22 +
.../resources/test-documents/testJNILIB.jnilib | Bin 0 -> 35332 bytes
.../test/resources/test-documents/testJPEG.jp2 | Bin 0 -> 25725 bytes
.../src/test/resources/test-documents/testJS.js | 53 +
.../test/resources/test-documents/testKML.kml | 917 +++
.../test/resources/test-documents/testKMZ.kmz | Bin 0 -> 8106 bytes
.../resources/test-documents/testLinux-arm-32le | Bin 0 -> 5517 bytes
.../test-documents/testLinux-mips-32be | Bin 0 -> 8125 bytes
.../test-documents/testLinux-mips-32le | Bin 0 -> 38051 bytes
.../resources/test-documents/testLinux-ppc-32be | Bin 0 -> 248480 bytes
.../resources/test-documents/testLinux-x86-64 | Bin 0 -> 8377 bytes
.../resources/test-documents/testLotusEml.eml | 71 +
.../test/resources/test-documents/testMATLAB.m | 4 +
.../test-documents/testMATLAB_barcast.m | 383 +
.../test-documents/testMATLAB_wtsgaus.m | 52 +
.../test-documents/testMHTMLFirefox.mhtml | 455 ++
.../test/resources/test-documents/testMKV.mkv | Bin 0 -> 82969 bytes
.../test/resources/test-documents/testMYSQL.MYD | Bin 0 -> 24 bytes
.../test/resources/test-documents/testMYSQL.MYI | Bin 0 -> 1024 bytes
.../test/resources/test-documents/testMYSQL.frm | Bin 0 -> 8594 bytes
.../test/resources/test-documents/testOPUS.opus | Bin 0 -> 1059 bytes
.../test-documents/testOptionalHyphen.doc | Bin 0 -> 22016 bytes
.../test-documents/testOptionalHyphen.docx | Bin 0 -> 10382 bytes
.../test-documents/testOptionalHyphen.pdf | Bin 0 -> 44954 bytes
.../test-documents/testOptionalHyphen.ppt | Bin 0 -> 100864 bytes
.../test-documents/testOptionalHyphen.pptx | Bin 0 -> 33173 bytes
.../test-documents/testOptionalHyphen.rtf | 158 +
.../test/resources/test-documents/testPBM.pbm | 3 +
.../test/resources/test-documents/testPGM.pgm | 4 +
.../test/resources/test-documents/testPICT.pct | Bin 0 -> 23454 bytes
.../resources/test-documents/testPPT_2imgs.ppt | Bin 0 -> 124928 bytes
.../resources/test-documents/testPageNumber.pdf | Bin 0 -> 52020 bytes
.../test-documents/testPhoneNumberExtractor.odt | Bin 0 -> 15244 bytes
.../test-documents/testPopupAnnotation.pdf | Bin 0 -> 9081 bytes
.../resources/test-documents/testQUATTRO.qpw | Bin 0 -> 4608 bytes
.../resources/test-documents/testQUATTRO.wb3 | Bin 0 -> 5120 bytes
.../test/resources/test-documents/testRDF.rdf | 23 +
.../resources/test-documents/testRFC822-CC-BCC | 44 +
.../resources/test-documents/testRFC822-big | 199 +
.../resources/test-documents/testSQLITE3.db | Bin 0 -> 2048 bytes
.../test/resources/test-documents/testSVG.svg | 7 +
.../resources/test-documents/testSolaris-x86-32 | Bin 0 -> 6404 bytes
.../test-documents/testStarOffice-5.2-calc.sdc | Bin 0 -> 17408 bytes
.../test-documents/testStarOffice-5.2-draw.sda | Bin 0 -> 29184 bytes
.../testStarOffice-5.2-impress.sdd | Bin 0 -> 29184 bytes
.../testStarOffice-5.2-writer.sdw | Bin 0 -> 8192 bytes
.../resources/test-documents/testTXT-tika.axx | Bin 0 -> 334 bytes
.../test/resources/test-documents/testTXT.txt | 2 +
.../test/resources/test-documents/testTXT.zlib0 | Bin 0 -> 58 bytes
.../test/resources/test-documents/testTXT.zlib5 | Bin 0 -> 55 bytes
.../test/resources/test-documents/testTXT.zlib9 | Bin 0 -> 55 bytes
.../test-documents/testTXTNonASCIIUTF8.txt | 7 +
.../test-documents/testThunderbirdEml.eml | 32 +
.../resources/test-documents/testTinyPE.exe | Bin 0 -> 1024 bytes
.../resources/test-documents/testVISIO.vsdm | Bin 0 -> 32360 bytes
.../resources/test-documents/testVISIO.vsdx | Bin 0 -> 32350 bytes
.../resources/test-documents/testVISIO.vssm | Bin 0 -> 32358 bytes
.../resources/test-documents/testVISIO.vssx | Bin 0 -> 32349 bytes
.../resources/test-documents/testVISIO.vstm | Bin 0 -> 32361 bytes
.../resources/test-documents/testVISIO.vstx | Bin 0 -> 32350 bytes
.../resources/test-documents/testVORBIS.ogg | Bin 0 -> 4241 bytes
.../test-documents/testVORCalcTemplate.vor | Bin 0 -> 17408 bytes
.../test-documents/testVORDrawTemplate.vor | Bin 0 -> 29696 bytes
.../test-documents/testVORImpressTemplate.vor | Bin 0 -> 30208 bytes
.../test-documents/testVORWriterTemplate.vor | Bin 0 -> 8192 bytes
.../test/resources/test-documents/testWAR.war | Bin 0 -> 1003 bytes
.../test-documents/testWEBARCHIVE.webarchive | 646 ++
.../test/resources/test-documents/testWEBM.webm | Bin 0 -> 39745 bytes
.../test/resources/test-documents/testWMA.wma | Bin 0 -> 27747 bytes
.../test/resources/test-documents/testWMF.wmf | Bin 0 -> 51590 bytes
.../test/resources/test-documents/testWMV.wmv | Bin 0 -> 113878 bytes
.../test/resources/test-documents/testWORD.docx | Bin 0 -> 13436 bytes
.../testWORD_protected_passtika.doc | Bin 0 -> 22016 bytes
.../test/resources/test-documents/testWORKS.wps | Bin 0 -> 9728 bytes
.../resources/test-documents/testWORKS2000.wps | Bin 0 -> 5120 bytes
.../testWORKSWordProcessor3.0.wps | Bin 0 -> 3072 bytes
.../testWORKSWordProcessor4.0.wps | Bin 0 -> 3584 bytes
.../resources/test-documents/testWebVTT.vtt | 33 +
.../test-documents/test_list_override.rtf | 21 +
.../test-documents/test_recursive_embedded.docx | Bin 0 -> 27082 bytes
.../test_recursive_embedded_npe.docx | Bin 0 -> 27817 bytes
tika-test-resources/pom.xml | 90 -
.../src/test/resources/log4j.properties | 24 -
.../apache/tika/config/TIKA-1558-blacklist.xml | 29 -
.../tika/config/TIKA-1558-blacklistsub.xml | 24 -
.../config/TIKA-1702-detector-blacklist.xml | 31 -
.../config/TIKA-1702-translator-default.xml | 24 -
.../TIKA-1702-translator-empty-default.xml | 22 -
.../tika/config/TIKA-1702-translator-empty.xml | 20 -
.../config/TIKA-1708-detector-composite.xml | 25 -
.../tika/config/TIKA-1708-detector-default.xml | 26 -
.../org/apache/tika/mime/custom-mimetypes.xml | 23 -
.../tika/parser/ner/opennlp/ModelGetter.groovy | 93 -
.../tika/parser/ner/opennlp/get-models.sh | 26 -
.../apache/tika/parser/ner/regex/ner-regex.txt | 17 -
.../org/apache/tika/parser/ner/tika-config.xml | 27 -
.../test-documents/AutoDetectParser.class | Bin 3794 -> 0 bytes
.../test/resources/test-documents/Doc1_ole.doc | Bin 89600 -> 0 bytes
.../test-documents/EmbeddedDocument.docx | Bin 13219 -> 0 bytes
.../test-documents/EmbeddedOutlook.docx | Bin 113242 -> 0 bytes
.../resources/test-documents/EmbeddedPDF.docx | Bin 99389 -> 0 bytes
.../GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb | Bin 1362900 -> 0 bytes
.../resources/test-documents/NUTCH-1997.cbor | 30 -
.../resources/test-documents/NullHeader.docx | Bin 4355 -> 0 bytes
.../test/resources/test-documents/TIKA-216.tgz | Bin 1270 -> 0 bytes
.../test-documents/WFPC2u5780205r_c0fx.fits | 1814 -----
.../resources/test-documents/Zamora2010.dif | 169 -
...tive_layer_arcss_grid_barrow_alaska_2012.dif | 61 -
.../resources/test-documents/big-preamble.html | 827 ---
.../test-documents/boilerplate-whitespace.html | 27 -
.../resources/test-documents/boilerplate.html | 41 -
.../breidamerkurjokull_radar_profiles_2009.mat | Bin 14748772 -> 0 bytes
...lues_of_alkanes_extracted_from_paleosols.dif | 84 -
.../resources/test-documents/chm/IMJPCL.CHM | Bin 757069 -> 0 bytes
.../resources/test-documents/chm/IMJPCLE.CHM | Bin 256718 -> 0 bytes
.../resources/test-documents/chm/IMTCEN.CHM | Bin 452547 -> 0 bytes
.../test/resources/test-documents/chm/admin.chm | Bin 49749 -> 0 bytes
.../resources/test-documents/chm/cmak_ops.CHM | Bin 82895 -> 0 bytes
.../resources/test-documents/chm/comexp.CHM | Bin 109882 -> 0 bytes
.../resources/test-documents/chm/gpedit.CHM | Bin 49537 -> 0 bytes
.../test/resources/test-documents/chm/tcpip.CHM | Bin 33186 -> 0 bytes
.../resources/test-documents/chm/wmicontrol.CHM | Bin 32096 -> 0 bytes
.../test/resources/test-documents/complex.mbox | 291 -
.../test-documents/egyl03.gdas.200811.00Z.grb2 | Bin 3447292 -> 0 bytes
.../resources/test-documents/english.cp500.txt | 1 -
.../test-documents/envi_test_header.hdr | 16 -
.../resources/test-documents/footnotes.docx | Bin 12823 -> 0 bytes
.../gdas1.forecmwf.2014062612.grib2 | Bin 2489194 -> 0 bytes
.../resources/test-documents/headerPic.docx | Bin 16206 -> 0 bytes
.../test/resources/test-documents/headers.mbox | 7 -
.../src/test/resources/test-documents/jxl.xls | Bin 614912 -> 0 bytes
.../src/test/resources/test-documents/moby.zip | Bin 606033 -> 0 bytes
.../test-documents/mock/embedded_then_npe.xml | 36 -
.../resources/test-documents/mock/example.xml | 51 -
.../resources/test-documents/mock/fake_oom.xml | 25 -
.../test-documents/mock/heavy_hang.xml | 25 -
.../test-documents/mock/nothing_bad.xml | 26 -
.../test-documents/mock/null_pointer.xml | 25 -
.../test-documents/mock/null_pointer_no_msg.xml | 25 -
.../resources/test-documents/mock/real_oom.xml | 24 -
.../resources/test-documents/mock/sleep.xml | 25 -
.../test-documents/mock/sleep_interruptible.xml | 25 -
.../mock/sleep_not_interruptible.xml | 25 -
.../resources/test-documents/multiline.mbox | 5 -
.../test/resources/test-documents/pictures.ppt | Bin 75776 -> 0 bytes
.../test/resources/test-documents/protect.xlsx | Bin 12968 -> 0 bytes
.../resources/test-documents/protectedFile.xlsx | Bin 12968 -> 0 bytes
.../test-documents/protectedSheets.xlsx | Bin 11236 -> 0 bytes
.../test/resources/test-documents/quoted.mbox | 4 -
.../test/resources/test-documents/resume.html | 73 -
.../test/resources/test-documents/rsstest.rss | 36 -
.../resources/test-documents/russian.cp866.txt | 6 -
.../test-documents/sampleFile.iso19139 | 453 --
.../test/resources/test-documents/simple.mbox | 7 -
.../sresa1b_ncar_ccsm3_0_run1_200001.nc | Bin 2767916 -> 0 bytes
.../test-documents/tableHeaders.numbers | Bin 89554 -> 0 bytes
.../resources/test-documents/tableNames.numbers | Bin 88246 -> 0 bytes
.../test-documents/test-documents-spanned.z01 | Bin 65536 -> 0 bytes
.../test-documents/test-documents-spanned.zip | Bin 3488 -> 0 bytes
.../resources/test-documents/test-documents.7z | Bin 66817 -> 0 bytes
.../test-documents/test-documents.cpio | Bin 116224 -> 0 bytes
.../resources/test-documents/test-documents.rar | Bin 67945 -> 0 bytes
.../resources/test-documents/test-documents.tar | Bin 133120 -> 0 bytes
.../test-documents/test-documents.tar.Z | Bin 103647 -> 0 bytes
.../test-documents/test-documents.tbz2 | Bin 71127 -> 0 bytes
.../resources/test-documents/test-documents.tgz | Bin 69060 -> 0 bytes
.../resources/test-documents/test-documents.zip | Bin 68403 -> 0 bytes
.../resources/test-documents/test-outlook.msg | Bin 19968 -> 0 bytes
.../test-documents/test-outlook2003.msg | Bin 83968 -> 0 bytes
.../test-documents/test-zip-of-zip.zip | Bin 299 -> 0 bytes
.../src/test/resources/test-documents/test.doc | Bin 9216 -> 0 bytes
.../src/test/resources/test-documents/test.fb2 | 350 -
.../src/test/resources/test-documents/test.hdf | Bin 542529 -> 0 bytes
.../src/test/resources/test-documents/test.he5 | Bin 1396916 -> 0 bytes
.../src/test/resources/test-documents/test1.swf | Bin 21054 -> 0 bytes
.../src/test/resources/test-documents/test2.swf | Bin 42534 -> 0 bytes
.../src/test/resources/test-documents/test3.swf | Bin 51562 -> 0 bytes
.../test-documents/test7Z_protected_passTika.7z | Bin 260 -> 0 bytes
.../resources/test-documents/testACCESS.mdb | Bin 110592 -> 0 bytes
.../test/resources/test-documents/testAFM.afm | 50 -
.../test/resources/test-documents/testAIFF.aif | Bin 3894 -> 0 bytes
.../resources/test-documents/testAMR-WB.amr | Bin 3609 -> 0 bytes
.../test/resources/test-documents/testAMR.amr | Bin 3620 -> 0 bytes
.../test/resources/test-documents/testAPK.apk | Bin 11740 -> 0 bytes
.../resources/test-documents/testARofSND.ar | Bin 3936 -> 0 bytes
.../resources/test-documents/testARofText.ar | 5 -
.../test/resources/test-documents/testASF.asf | Bin 62439 -> 0 bytes
.../resources/test-documents/testASiCE.asice | Bin 2916 -> 0 bytes
.../resources/test-documents/testASiCS.asics | Bin 2705 -> 0 bytes
.../test/resources/test-documents/testATOM.atom | 27 -
.../src/test/resources/test-documents/testAU.au | Bin 3868 -> 0 bytes
.../resources/test-documents/testAccess2.accdb | Bin 794624 -> 0 bytes
.../test-documents/testAccess2_2000.mdb | Bin 421888 -> 0 bytes
.../test-documents/testAccess2_2002-2003.mdb | Bin 417792 -> 0 bytes
.../test-documents/testAccess2_encrypted.accdb | Bin 557056 -> 0 bytes
.../test-documents/testAccess_V1997.mdb | Bin 118784 -> 0 bytes
.../test-documents/testAnnotations.pdf | Bin 18580 -> 0 bytes
.../resources/test-documents/testBDB_btree_2.db | Bin 8192 -> 0 bytes
.../resources/test-documents/testBDB_btree_3.db | Bin 8192 -> 0 bytes
.../resources/test-documents/testBDB_btree_4.db | Bin 8192 -> 0 bytes
.../resources/test-documents/testBDB_btree_5.db | Bin 8192 -> 0 bytes
.../resources/test-documents/testBDB_hash_2.db | Bin 12288 -> 0 bytes
.../resources/test-documents/testBDB_hash_3.db | Bin 12288 -> 0 bytes
.../resources/test-documents/testBDB_hash_4.db | Bin 12288 -> 0 bytes
.../resources/test-documents/testBDB_hash_5.db | Bin 12288 -> 0 bytes
.../resources/test-documents/testBIBTEX.bib | 21 -
.../test/resources/test-documents/testBMP.bmp | Bin 22554 -> 0 bytes
.../test/resources/test-documents/testBMPfp.txt | 3 -
.../test/resources/test-documents/testBPG.bpg | Bin 1824 -> 0 bytes
.../resources/test-documents/testBPG_GEO.bpg | Bin 2042 -> 0 bytes
.../test-documents/testBPG_commented.bpg | Bin 10281 -> 0 bytes
.../testBPG_commented_xnviewmp026.bpg | Bin 12374 -> 0 bytes
.../test-documents/testBinControlWord.rtf | 2 -
.../test-documents/testBulletPoints.key | Bin 213830 -> 0 bytes
.../src/test/resources/test-documents/testC.c | 6 -
.../resources/test-documents/testCADKEY.prt | Bin 10246 -> 0 bytes
.../resources/test-documents/testCADKEY2.prt | Bin 41664 -> 0 bytes
.../test/resources/test-documents/testCOREL.shw | Bin 77824 -> 0 bytes
.../test/resources/test-documents/testCPP.cpp | 14 -
.../test/resources/test-documents/testCSS.css | 48 -
.../test/resources/test-documents/testCSV.csv | 23 -
.../test/resources/test-documents/testChm.chm | Bin 186259 -> 0 bytes
.../test/resources/test-documents/testChm2.chm | Bin 10807437 -> 0 bytes
.../test/resources/test-documents/testChm3.chm | Bin 900481 -> 0 bytes
.../resources/test-documents/testComment.doc | Bin 22528 -> 0 bytes
.../resources/test-documents/testComment.docx | Bin 11019 -> 0 bytes
.../resources/test-documents/testComment.pdf | Bin 68398 -> 0 bytes
.../resources/test-documents/testComment.ppt | Bin 101376 -> 0 bytes
.../resources/test-documents/testComment.pptx | Bin 34979 -> 0 bytes
.../resources/test-documents/testComment.rtf | 169 -
.../resources/test-documents/testComment.xls | Bin 23040 -> 0 bytes
.../resources/test-documents/testComment.xlsx | Bin 9692 -> 0 bytes
.../test-documents/testControlCharacters.doc | Bin 448000 -> 0 bytes
.../test/resources/test-documents/testDITA.dita | 34 -
.../resources/test-documents/testDITA.ditamap | 23 -
.../resources/test-documents/testDITA2.dita | 33 -
.../test-documents/testDOCX_Thumbnail.docx | Bin 13810 -> 0 bytes
.../test/resources/test-documents/testDOTM.dotm | Bin 65527 -> 0 bytes
.../resources/test-documents/testDWG2000.dwg | Bin 675048 -> 0 bytes
.../resources/test-documents/testDWG2004.dwg | Bin 39335 -> 0 bytes
.../test-documents/testDWG2004_no_header.dwg | Bin 38178 -> 0 bytes
.../resources/test-documents/testDWG2007.dwg | Bin 73088 -> 0 bytes
.../resources/test-documents/testDWG2010.dwg | Bin 59562 -> 0 bytes
.../test-documents/testDWG2010_custom_props.dwg | Bin 73791 -> 0 bytes
.../test-documents/testDWGmech2004.dwg | Bin 439438 -> 0 bytes
.../test-documents/testDWGmech2004DX.dwg | Bin 439563 -> 0 bytes
.../test-documents/testDWGmech2005.dwg | Bin 439627 -> 0 bytes
.../test-documents/testDWGmech2006.dwg | Bin 439982 -> 0 bytes
.../test-documents/testDWGmech2007.dwg | Bin 479904 -> 0 bytes
.../test-documents/testDWGmech2008.dwg | Bin 487456 -> 0 bytes
.../test-documents/testDWGmech2009.dwg | Bin 483840 -> 0 bytes
.../test-documents/testDWGmech2010.dwg | Bin 467023 -> 0 bytes
.../test-documents/testDWGmech2011.dwg | Bin 466891 -> 0 bytes
.../resources/test-documents/testDWGmech6.dwg | Bin 687882 -> 0 bytes
.../resources/test-documents/testDetached.p7s | Bin 2941 -> 0 bytes
.../test-documents/testDocumentLink.doc | Bin 812032 -> 0 bytes
.../test/resources/test-documents/testEAR.ear | Bin 1086 -> 0 bytes
.../test/resources/test-documents/testEMF.emf | Bin 60400 -> 0 bytes
.../test/resources/test-documents/testEMLX.emlx | 55 -
.../test/resources/test-documents/testEPUB.epub | Bin 29719 -> 0 bytes
.../test-documents/testEXCEL-charts.xls | Bin 15360 -> 0 bytes
.../test-documents/testEXCEL-formats.xls | Bin 13824 -> 0 bytes
.../test-documents/testEXCEL-formats.xlsx | Bin 8303 -> 0 bytes
.../test-documents/testEXCEL.strict.xlsx | Bin 10006 -> 0 bytes
.../test/resources/test-documents/testEXCEL.xls | Bin 13824 -> 0 bytes
.../resources/test-documents/testEXCEL.xlsb | Bin 9161 -> 0 bytes
.../resources/test-documents/testEXCEL.xlsx | Bin 9453 -> 0 bytes
.../resources/test-documents/testEXCEL_1img.xls | Bin 20992 -> 0 bytes
.../test-documents/testEXCEL_1img.xlsx | Bin 14552 -> 0 bytes
.../resources/test-documents/testEXCEL_4.xls | Bin 39942 -> 0 bytes
.../resources/test-documents/testEXCEL_5.xls | Bin 7168 -> 0 bytes
.../resources/test-documents/testEXCEL_95.xls | Bin 20992 -> 0 bytes
.../test-documents/testEXCEL_custom_props.xls | Bin 17408 -> 0 bytes
.../test-documents/testEXCEL_custom_props.xlsx | Bin 9230 -> 0 bytes
.../test-documents/testEXCEL_embeded.xls | Bin 303104 -> 0 bytes
.../test-documents/testEXCEL_embeded.xlsx | Bin 348405 -> 0 bytes
.../testEXCEL_headers_footers.xls | Bin 33792 -> 0 bytes
.../testEXCEL_headers_footers.xlsx | Bin 11740 -> 0 bytes
.../testEXCEL_protected_passtika.xls | Bin 17408 -> 0 bytes
.../testEXCEL_protected_passtika.xlsx | Bin 12800 -> 0 bytes
.../test-documents/testEXCEL_textbox.xlsx | Bin 11017 -> 0 bytes
.../resources/test-documents/testEmbedded.zip | Bin 340 -> 0 bytes
.../resources/test-documents/testException1.doc | Bin 49152 -> 0 bytes
.../resources/test-documents/testException2.doc | Bin 58368 -> 0 bytes
.../test-documents/testExtraSpaces.pdf | Bin 20743 -> 0 bytes
.../test/resources/test-documents/testFITS.fits | 5 -
.../test/resources/test-documents/testFLAC.flac | Bin 10604 -> 0 bytes
.../test/resources/test-documents/testFLAC.oga | Bin 10820 -> 0 bytes
.../test/resources/test-documents/testFLV.flv | Bin 90580 -> 0 bytes
.../resources/test-documents/testFOXMAIL.box | 1327 ----
.../testFontAfterBufferedText.rtf | 7 -
.../resources/test-documents/testFooter.ods | Bin 7207 -> 0 bytes
.../resources/test-documents/testFooter.odt | Bin 8381 -> 0 bytes
.../resources/test-documents/testFreeBSD-x86-64 | Bin 6369 -> 0 bytes
.../test/resources/test-documents/testGIF.gif | Bin 8495 -> 0 bytes
.../resources/test-documents/testGROOVY.groovy | 9 -
.../test-documents/testGroupWiseEml.eml | 58 -
.../src/test/resources/test-documents/testH.h | 5 -
.../test/resources/test-documents/testHTML.html | 28 -
.../testHTMLNoisyMetaEncoding_1.html | 77 -
.../testHTMLNoisyMetaEncoding_2.html | 77 -
.../testHTMLNoisyMetaEncoding_3.html | 77 -
.../testHTMLNoisyMetaEncoding_4.html | 77 -
.../resources/test-documents/testHTML_utf8.html | 25 -
.../resources/test-documents/testHWP_3.0.hwp | Bin 9287 -> 0 bytes
.../resources/test-documents/testHWP_5.0.hwp | Bin 16384 -> 0 bytes
.../test/resources/test-documents/testINDD.indd | Bin 880640 -> 0 bytes
.../test/resources/test-documents/testIPA.ipa | Bin 163803 -> 0 bytes
...-2_metabolite profiling_NMR spectroscopy.txt | 51 -
.../testISATab_BII-I-1/a_metabolome.txt | 112 -
.../testISATab_BII-I-1/a_microarray.txt | 15 -
.../testISATab_BII-I-1/a_proteome.txt | 19 -
.../testISATab_BII-I-1/a_transcriptome.txt | 49 -
.../testISATab_BII-I-1/i_investigation.txt | 164 -
.../testISATab_BII-I-1/s_BII-S-1.txt | 165 -
.../testISATab_BII-I-1/s_BII-S-2.txt | 15 -
.../test/resources/test-documents/testJAR.jar | Bin 441 -> 0 bytes
.../test-documents/testJAR_with_HTML.jar | Bin 5594 -> 0 bytes
.../test-documents/testJAR_with_PEHDR.jar | Bin 35343 -> 0 bytes
.../test/resources/test-documents/testJAVA.java | 14 -
.../test-documents/testJAVAPROPS.properties | 22 -
.../resources/test-documents/testJNILIB.jnilib | Bin 35332 -> 0 bytes
.../test/resources/test-documents/testJPEG.jp2 | Bin 25725 -> 0 bytes
.../test/resources/test-documents/testJPEG.jpg | Bin 7686 -> 0 bytes
.../resources/test-documents/testJPEG_EXIF.jpg | Bin 16357 -> 0 bytes
.../testJPEG_EXIF_emptyDateTime.jpg | Bin 24597 -> 0 bytes
.../resources/test-documents/testJPEG_GEO.jpg | Bin 16482 -> 0 bytes
.../resources/test-documents/testJPEG_GEO_2.jpg | Bin 20844 -> 0 bytes
.../test-documents/testJPEG_commented.jpg | Bin 13325 -> 0 bytes
.../testJPEG_commented_pspcs2mac.jpg | Bin 26173 -> 0 bytes
.../testJPEG_commented_xnviewmp026.jpg | Bin 13910 -> 0 bytes
.../test-documents/testJPEG_oddTagComponent.jpg | Bin 8330 -> 0 bytes
.../src/test/resources/test-documents/testJS.js | 53 -
.../test-documents/testJournalParser.pdf | Bin 985125 -> 0 bytes
.../test/resources/test-documents/testKML.kml | 917 ---
.../test/resources/test-documents/testKMZ.kmz | Bin 8106 -> 0 bytes
.../resources/test-documents/testKeynote.key | Bin 221745 -> 0 bytes
.../resources/test-documents/testLinux-arm-32le | Bin 5517 -> 0 bytes
.../test-documents/testLinux-mips-32be | Bin 8125 -> 0 bytes
.../test-documents/testLinux-mips-32le | Bin 38051 -> 0 bytes
.../resources/test-documents/testLinux-ppc-32be | Bin 248480 -> 0 bytes
.../resources/test-documents/testLinux-x86-32 | Bin 7175 -> 0 bytes
.../resources/test-documents/testLinux-x86-64 | Bin 8377 -> 0 bytes
.../resources/test-documents/testLotusEml.eml | 71 -
.../test/resources/test-documents/testMATLAB.m | 4 -
.../test-documents/testMATLAB_barcast.m | 383 -
.../test-documents/testMATLAB_wtsgaus.m | 52 -
.../test-documents/testMHTMLFirefox.mhtml | 455 --
.../test/resources/test-documents/testMID.mid | Bin 322 -> 0 bytes
.../test/resources/test-documents/testMKV.mkv | Bin 82969 -> 0 bytes
.../resources/test-documents/testMP3i18n.mp3 | Bin 40832 -> 0 bytes
.../resources/test-documents/testMP3id3v1.mp3 | Bin 39416 -> 0 bytes
.../test-documents/testMP3id3v1_v2.mp3 | Bin 40960 -> 0 bytes
.../resources/test-documents/testMP3id3v2.mp3 | Bin 39577 -> 0 bytes
.../resources/test-documents/testMP3id3v24.mp3 | Bin 39471 -> 0 bytes
.../resources/test-documents/testMP3lyrics.mp3 | Bin 34688 -> 0 bytes
.../resources/test-documents/testMP3noid3.mp3 | Bin 39288 -> 0 bytes
.../test-documents/testMP3truncated.mp3 | Bin 65536 -> 0 bytes
.../test/resources/test-documents/testMP4.m4a | Bin 4770 -> 0 bytes
.../test/resources/test-documents/testMSG.msg | Bin 20480 -> 0 bytes
.../test-documents/testMSG_att_doc.msg | Bin 52224 -> 0 bytes
.../test-documents/testMSG_att_msg.msg | Bin 71680 -> 0 bytes
.../test-documents/testMSG_chinese.msg | Bin 48129 -> 0 bytes
.../test-documents/testMSG_forwarded.msg | Bin 25600 -> 0 bytes
.../test/resources/test-documents/testMYSQL.MYD | Bin 24 -> 0 bytes
.../test/resources/test-documents/testMYSQL.MYI | Bin 1024 -> 0 bytes
.../test/resources/test-documents/testMYSQL.frm | Bin 8594 -> 0 bytes
.../test-documents/testMasterFooter.odp | Bin 13975 -> 0 bytes
.../test-documents/testMasterSlideTable.key | Bin 220184 -> 0 bytes
.../test-documents/testNPEOpenDocument.odt | Bin 18304 -> 0 bytes
.../test-documents/testNakedUTF16BOM.mp3 | Bin 2625 -> 0 bytes
.../test-documents/testNumbers.numbers | Bin 134571 -> 0 bytes
.../test-documents/testNumbersCharts.numbers | Bin 104547 -> 0 bytes
.../test/resources/test-documents/testOCR.docx | Bin 62041 -> 0 bytes
.../test/resources/test-documents/testOCR.jpg | Bin 3408 -> 0 bytes
.../test/resources/test-documents/testOCR.pdf | Bin 41936 -> 0 bytes
.../test/resources/test-documents/testOCR.pptx | Bin 78550 -> 0 bytes
.../test-documents/testOCTET_header.dbase3 | Bin 194 -> 0 bytes
.../test-documents/testODFwithOOo3.odt | Bin 24286 -> 0 bytes
.../test-documents/testODT-TIKA-6000.odt | Bin 3888830 -> 0 bytes
.../test/resources/test-documents/testOPUS.opus | Bin 1059 -> 0 bytes
.../test-documents/testOpenOffice2.odf | Bin 10977 -> 0 bytes
.../test-documents/testOpenOffice2.odt | Bin 26448 -> 0 bytes
.../test-documents/testOptionalHyphen.doc | Bin 22016 -> 0 bytes
.../test-documents/testOptionalHyphen.docx | Bin 10382 -> 0 bytes
.../test-documents/testOptionalHyphen.pdf | Bin 44954 -> 0 bytes
.../test-documents/testOptionalHyphen.ppt | Bin 100864 -> 0 bytes
.../test-documents/testOptionalHyphen.pptx | Bin 33173 -> 0 bytes
.../test-documents/testOptionalHyphen.rtf | 158 -
.../test-documents/testOverlappingText.pdf | Bin 899 -> 0 bytes
.../test/resources/test-documents/testPBM.pbm | 3 -
.../test-documents/testPDF-custommetadata.pdf | Bin 7495 -> 0 bytes
.../test/resources/test-documents/testPDF.pdf | Bin 34824 -> 0 bytes
.../testPDFEmbeddingAndEmbedded.docx | Bin 34139 -> 0 bytes
.../testPDFFileEmbInAnnotation.pdf | Bin 97211 -> 0 bytes
.../resources/test-documents/testPDFPackage.pdf | Bin 92359 -> 0 bytes
.../test-documents/testPDFTripleLangTitle.pdf | Bin 1719 -> 0 bytes
.../test-documents/testPDFTwoTextBoxes.pdf | Bin 57100 -> 0 bytes
.../resources/test-documents/testPDFVarious.pdf | Bin 205491 -> 0 bytes
.../testPDF_PDFEncodedStringInXMP.pdf | Bin 7210 -> 0 bytes
.../test-documents/testPDF_Version.10.x.pdf | Bin 5811 -> 0 bytes
.../testPDF_Version.11.x.PDFA-1b.pdf | Bin 23081 -> 0 bytes
.../test-documents/testPDF_Version.4.x.pdf | Bin 10007 -> 0 bytes
.../test-documents/testPDF_Version.5.x.pdf | Bin 5953 -> 0 bytes
.../test-documents/testPDF_Version.6.x.pdf | Bin 5903 -> 0 bytes
.../test-documents/testPDF_Version.7.x.pdf | Bin 5903 -> 0 bytes
.../test-documents/testPDF_Version.8.x.pdf | Bin 5903 -> 0 bytes
.../test-documents/testPDF_Version.9.x.pdf | Bin 5998 -> 0 bytes
.../test-documents/testPDF_acroform3.pdf | Bin 26746 -> 0 bytes
.../resources/test-documents/testPDF_bom.pdf | Bin 7645 -> 0 bytes
.../test-documents/testPDF_bookmarks.pdf | Bin 9487 -> 0 bytes
.../test-documents/testPDF_childAttachments.pdf | Bin 2318262 -> 0 bytes
.../testPDF_multiFormatEmbFiles.pdf | Bin 2662 -> 0 bytes
..._no_extract_no_accessibility_owner_empty.pdf | 87 -
...F_no_extract_no_accessibility_owner_user.pdf | 87 -
...no_extract_yes_accessibility_owner_empty.pdf | 87 -
..._no_extract_yes_accessibility_owner_user.pdf | 87 -
.../test-documents/testPDF_protected.pdf | Bin 506064 -> 0 bytes
.../test-documents/testPDF_twoAuthors.pdf | Bin 12628 -> 0 bytes
.../test/resources/test-documents/testPGM.pgm | 4 -
.../test/resources/test-documents/testPICT.pct | Bin 23454 -> 0 bytes
.../test/resources/test-documents/testPNG.png | Bin 17041 -> 0 bytes
.../test/resources/test-documents/testPPM.ppm | 4 -
.../test/resources/test-documents/testPPT.potm | Bin 40102 -> 0 bytes
.../test/resources/test-documents/testPPT.ppsm | Bin 36545 -> 0 bytes
.../test/resources/test-documents/testPPT.ppsx | Bin 36521 -> 0 bytes
.../test/resources/test-documents/testPPT.ppt | Bin 16384 -> 0 bytes
.../test/resources/test-documents/testPPT.pptm | Bin 36541 -> 0 bytes
.../test/resources/test-documents/testPPT.pptx | Bin 36518 -> 0 bytes
.../test/resources/test-documents/testPPT.thmx | Bin 42485 -> 0 bytes
.../test/resources/test-documents/testPPT.xps | Bin 75442 -> 0 bytes
.../test-documents/testPPTX_Thumbnail.pptx | Bin 42580 -> 0 bytes
.../resources/test-documents/testPPT_2imgs.ppt | Bin 124928 -> 0 bytes
.../resources/test-documents/testPPT_2imgs.pptx | Bin 59246 -> 0 bytes
.../test-documents/testPPT_autodate.ppt | Bin 148992 -> 0 bytes
.../test-documents/testPPT_autodate.pptx | Bin 47707 -> 0 bytes
.../test-documents/testPPT_comment.ppt | Bin 86016 -> 0 bytes
.../test-documents/testPPT_comment.pptx | Bin 30939 -> 0 bytes
.../test-documents/testPPT_custom_props.ppt | Bin 104960 -> 0 bytes
.../test-documents/testPPT_custom_props.pptx | Bin 37864 -> 0 bytes
.../test-documents/testPPT_embedded2.ppt | Bin 92160 -> 0 bytes
.../testPPT_embedded_two_slides.pptx | Bin 255364 -> 0 bytes
.../test-documents/testPPT_embeded.ppt | Bin 224768 -> 0 bytes
.../test-documents/testPPT_embeded.pptx | Bin 202969 -> 0 bytes
.../test-documents/testPPT_masterFooter.ppt | Bin 139776 -> 0 bytes
.../test-documents/testPPT_masterFooter.pptx | Bin 35128 -> 0 bytes
.../test-documents/testPPT_masterText.ppt | Bin 117760 -> 0 bytes
.../test-documents/testPPT_masterText.pptx | Bin 32270 -> 0 bytes
.../test-documents/testPPT_masterText2.ppt | Bin 102912 -> 0 bytes
.../test-documents/testPPT_masterText2.pptx | Bin 32291 -> 0 bytes
.../testPPT_protected_passtika.ppt | Bin 43008 -> 0 bytes
.../testPPT_protected_passtika.pptx | Bin 41472 -> 0 bytes
.../test-documents/testPPT_various.ppt | Bin 160768 -> 0 bytes
.../test-documents/testPPT_various.pptx | Bin 56659 -> 0 bytes
.../test-documents/testPROJECT2003.mpp | Bin 125440 -> 0 bytes
.../test-documents/testPROJECT2007.mpp | Bin 147968 -> 0 bytes
.../test/resources/test-documents/testPSD.psd | Bin 69410 -> 0 bytes
.../test/resources/test-documents/testPSD2.psd | Bin 31315 -> 0 bytes
.../test/resources/test-documents/testPST.pst | Bin 271360 -> 0 bytes
.../resources/test-documents/testPUBLISHER.pub | Bin 65536 -> 0 bytes
.../resources/test-documents/testPageNumber.pdf | Bin 52020 -> 0 bytes
.../resources/test-documents/testPages.pages | Bin 134152 -> 0 bytes
.../test-documents/testPagesComments.pages | Bin 154546 -> 0 bytes
.../testPagesHeadersFootersAlphaLower.pages | Bin 168501 -> 0 bytes
.../testPagesHeadersFootersAlphaUpper.pages | Bin 168995 -> 0 bytes
.../testPagesHeadersFootersFootnotes.pages | Bin 177328 -> 0 bytes
.../testPagesHeadersFootersRomanLower.pages | Bin 103923 -> 0 bytes
.../testPagesHeadersFootersRomanUpper.pages | Bin 174197 -> 0 bytes
.../test-documents/testPagesLayout.pages | Bin 66480 -> 0 bytes
.../test-documents/testPagesPwdProtected.pages | Bin 33166 -> 0 bytes
.../test-documents/testPhoneNumberExtractor.odt | Bin 15244 -> 0 bytes
.../test-documents/testPopupAnnotation.pdf | Bin 9081 -> 0 bytes
.../resources/test-documents/testQUATTRO.qpw | Bin 4608 -> 0 bytes
.../resources/test-documents/testQUATTRO.wb3 | Bin 5120 -> 0 bytes
.../test/resources/test-documents/testRDF.rdf | 23 -
.../test/resources/test-documents/testRFC822 | 41 -
.../resources/test-documents/testRFC822-CC-BCC | 44 -
.../resources/test-documents/testRFC822-big | 199 -
.../test-documents/testRFC822-limitedheaders | 9 -
.../test-documents/testRFC822-multipart | 111 -
.../resources/test-documents/testRFC822_base64 | 8 -
.../test-documents/testRFC822_encrypted_zip | 61 -
.../test-documents/testRFC822_i18nheaders | 9 -
.../test-documents/testRFC822_normal_zip | 61 -
.../resources/test-documents/testRFC822_oddfrom | 2105 ------
.../resources/test-documents/testRFC822_quoted | 13 -
.../resources/test-documents/testRTF-ms932.rtf | 30 -
.../test/resources/test-documents/testRTF.rtf | 17 -
.../test-documents/testRTFBoldItalic.rtf | 164 -
.../test-documents/testRTFControls.rtf | 165 -
.../testRTFCorruptListOverride.rtf | 95 -
.../test-documents/testRTFEmbeddedFiles.rtf | 6856 ------------------
.../test-documents/testRTFEmbeddedLink.rtf | 1438 ----
.../testRTFHexEscapeInsideWord.rtf | 4 -
.../test-documents/testRTFHyperlink.rtf | 598 --
.../testRTFIgnoredControlWord.rtf | 17 -
.../test-documents/testRTFInvalidUnicode.rtf | 11 -
.../test-documents/testRTFJapanese.rtf | 87 -
.../test-documents/testRTFListLibreOffice.rtf | 67 -
.../test-documents/testRTFListMicrosoftWord.rtf | 227 -
.../test-documents/testRTFListOverride.rtf | 424 --
.../test-documents/testRTFNewlines.rtf | 27 -
.../test-documents/testRTFRegularImages.rtf | 1241 ----
.../testRTFTableCellSeparation.rtf | 7 -
.../testRTFTableCellSeparation2.rtf | 3 -
.../test-documents/testRTFUmlautSpaces.rtf | 3 -
.../test-documents/testRTFUmlautSpaces2.rtf | 8 -
.../test-documents/testRTFUnicodeGothic.rtf | 5 -
...TFUnicodeUCNControlWordCharacterDoubling.rtf | 8 -
.../resources/test-documents/testRTFVarious.rtf | 329 -
.../testRTFWindowsCodepage1250.rtf | 5 -
.../test-documents/testRTFWithCurlyBraces.rtf | 44 -
.../testRTFWord2010CzechCharacters.rtf | 190 -
.../testRTFWordPadCzechCharacters.rtf | 5 -
.../resources/test-documents/testSQLITE3.db | Bin 2048 -> 0 bytes
.../test/resources/test-documents/testSVG.svg | 7 -
.../test/resources/test-documents/testSVG.svgz | Bin 222 -> 0 bytes
.../resources/test-documents/testSolaris-x86-32 | Bin 6404 -> 0 bytes
.../resources/test-documents/testSqlite3b.db | Bin 27648 -> 0 bytes
.../test-documents/testStarOffice-5.2-calc.sdc | Bin 17408 -> 0 bytes
.../test-documents/testStarOffice-5.2-draw.sda | Bin 29184 -> 0 bytes
.../testStarOffice-5.2-impress.sdd | Bin 29184 -> 0 bytes
.../testStarOffice-5.2-writer.sdw | Bin 8192 -> 0 bytes
.../resources/test-documents/testStyles.odt | Bin 11663 -> 0 bytes
.../test/resources/test-documents/testTIFF.tif | Bin 25584 -> 0 bytes
.../resources/test-documents/testTXT-tika.axx | Bin 334 -> 0 bytes
.../test/resources/test-documents/testTXT.txt | 2 -
.../test/resources/test-documents/testTXT.zlib | Bin 55 -> 0 bytes
.../test/resources/test-documents/testTXT.zlib0 | Bin 58 -> 0 bytes
.../test/resources/test-documents/testTXT.zlib5 | Bin 55 -> 0 bytes
.../test/resources/test-documents/testTXT.zlib9 | Bin 55 -> 0 bytes
.../test-documents/testTXTNonASCIIUTF8.txt | 7 -
.../resources/test-documents/testTables.key | Bin 216497 -> 0 bytes
.../resources/test-documents/testTextBoxes.key | Bin 208981 -> 0 bytes
.../test-documents/testThunderbirdEml.eml | 32 -
.../resources/test-documents/testTinyPE.exe | Bin 1024 -> 0 bytes
.../resources/test-documents/testTrueType3.ttf | Bin 224592 -> 0 bytes
.../test-documents/testUserDefinedCharset.mhtml | 21 -
.../test/resources/test-documents/testVISIO.vsd | Bin 45568 -> 0 bytes
.../resources/test-documents/testVISIO.vsdm | Bin 32360 -> 0 bytes
.../resources/test-documents/testVISIO.vsdx | Bin 32350 -> 0 bytes
.../resources/test-documents/testVISIO.vssm | Bin 32358 -> 0 bytes
.../resources/test-documents/testVISIO.vssx | Bin 32349 -> 0 bytes
.../resources/test-documents/testVISIO.vstm | Bin 32361 -> 0 bytes
.../resources/test-documents/testVISIO.vstx | Bin 32350 -> 0 bytes
.../resources/test-documents/testVORBIS.ogg | Bin 4241 -> 0 bytes
.../test-documents/testVORCalcTemplate.vor | Bin 17408 -> 0 bytes
.../test-documents/testVORDrawTemplate.vor | Bin 29696 -> 0 bytes
.../test-documents/testVORImpressTemplate.vor | Bin 30208 -> 0 bytes
.../test-documents/testVORWriterTemplate.vor | Bin 8192 -> 0 bytes
.../test/resources/test-documents/testWAR.war | Bin 1003 -> 0 bytes
.../test/resources/test-documents/testWAV.wav | Bin 3884 -> 0 bytes
.../test-documents/testWEBARCHIVE.webarchive | 646 --
.../test/resources/test-documents/testWEBM.webm | Bin 39745 -> 0 bytes
.../test/resources/test-documents/testWEBP.webp | Bin 3442 -> 0 bytes
.../resources/test-documents/testWINMAIL.dat | Bin 66276 -> 0 bytes
.../test/resources/test-documents/testWMA.wma | Bin 27747 -> 0 bytes
.../test/resources/test-documents/testWMF.wmf | Bin 51590 -> 0 bytes
.../test/resources/test-documents/testWMV.wmv | Bin 113878 -> 0 bytes
.../test/resources/test-documents/testWORD.doc | Bin 32768 -> 0 bytes
.../test/resources/test-documents/testWORD.docx | Bin 13436 -> 0 bytes
.../test/resources/test-documents/testWORD6.doc | Bin 6656 -> 0 bytes
.../resources/test-documents/testWORD_1img.doc | Bin 14848 -> 0 bytes
.../resources/test-documents/testWORD_1img.docx | Bin 8325 -> 0 bytes
.../resources/test-documents/testWORD_3imgs.doc | Bin 36352 -> 0 bytes
.../test-documents/testWORD_3imgs.docx | Bin 31303 -> 0 bytes
.../testWORD_bold_character_runs.doc | Bin 22016 -> 0 bytes
.../testWORD_bold_character_runs.docx | Bin 12912 -> 0 bytes
.../testWORD_bold_character_runs2.doc | Bin 22016 -> 0 bytes
.../testWORD_bold_character_runs2.docx | Bin 12863 -> 0 bytes
.../testWORD_closingSmartQInHyperLink.doc | Bin 26624 -> 0 bytes
.../test-documents/testWORD_custom_props.doc | Bin 22528 -> 0 bytes
.../test-documents/testWORD_custom_props.docx | Bin 13942 -> 0 bytes
.../test-documents/testWORD_embedded_pdf.doc | Bin 1491456 -> 0 bytes
.../test-documents/testWORD_embedded_pdf.docx | Bin 63294 -> 0 bytes
.../test-documents/testWORD_embedded_rtf.doc | Bin 16384 -> 0 bytes
.../test-documents/testWORD_embeded.doc | Bin 319488 -> 0 bytes
.../test-documents/testWORD_embeded.docx | Bin 157830 -> 0 bytes
.../testWORD_header_hyperlink.doc | Bin 22528 -> 0 bytes
.../testWORD_missing_ooxml_bean1.docx | Bin 17913 -> 0 bytes
.../test-documents/testWORD_missing_text.docx | Bin 31592 -> 0 bytes
.../test-documents/testWORD_multi_authors.doc | Bin 22528 -> 0 bytes
.../test-documents/testWORD_multi_authors.docx | Bin 12054 -> 0 bytes
.../test-documents/testWORD_no_format.doc | Bin 74752 -> 0 bytes
.../test-documents/testWORD_no_format.docx | Bin 37018 -> 0 bytes
.../test-documents/testWORD_null_style.docx | Bin 29018 -> 0 bytes
.../test-documents/testWORD_numbered_list.doc | Bin 44032 -> 0 bytes
.../test-documents/testWORD_numbered_list.docx | Bin 24696 -> 0 bytes
.../testWORD_override_list_numbering.doc | Bin 56320 -> 0 bytes
.../testWORD_override_list_numbering.docx | Bin 15746 -> 0 bytes
.../testWORD_protected_passtika.doc | Bin 22016 -> 0 bytes
.../testWORD_protected_passtika.docx | Bin 14336 -> 0 bytes
.../test-documents/testWORD_tabular_symbol.doc | Bin 10240 -> 0 bytes
.../test-documents/testWORD_text_box.docx | Bin 25271 -> 0 bytes
.../test-documents/testWORD_various.doc | Bin 35328 -> 0 bytes
.../test-documents/testWORD_various.docx | Bin 19169 -> 0 bytes
.../test/resources/test-documents/testWORKS.wps | Bin 9728 -> 0 bytes
.../resources/test-documents/testWORKS2000.wps | Bin 5120 -> 0 bytes
.../test-documents/testWORKSSpreadsheet7.0.xlr | Bin 10752 -> 0 bytes
.../testWORKSWordProcessor3.0.wps | Bin 3072 -> 0 bytes
.../testWORKSWordProcessor4.0.wps | Bin 3584 -> 0 bytes
.../resources/test-documents/testWebVTT.vtt | 33 -
.../test-documents/testWebp_Alpha_Lossless.webp | Bin 92312 -> 0 bytes
.../test-documents/testWebp_Alpha_Lossy.webp | Bin 23404 -> 0 bytes
.../test-documents/testWindows-x86-32.exe | Bin 11723 -> 0 bytes
.../resources/test-documents/testWordArt.pptx | Bin 37792 -> 0 bytes
.../resources/test-documents/testXHTML.html | 29 -
.../test-documents/testXLSX_Thumbnail.xlsx | Bin 10318 -> 0 bytes
.../test/resources/test-documents/testXML.xml | 48 -
.../test/resources/test-documents/testXML2.xml | 1 -
.../test/resources/test-documents/testXML3.xml | 23 -
.../resources/test-documents/test_TIKA-1251.doc | Bin 50688 -> 0 bytes
.../test-documents/test_embedded_package.rtf | 71 -
.../test-documents/test_embedded_zip.pptx | Bin 345027 -> 0 bytes
.../test-documents/test_list_override.rtf | 21 -
.../resources/test-documents/test_mat_text.mat | Bin 183 -> 0 bytes
.../test-documents/test_recursive_embedded.docx | Bin 27082 -> 0 bytes
.../test_recursive_embedded_npe.docx | Bin 27817 -> 0 bytes
.../resources/test-documents/testiBooks.ibooks | Bin 970636 -> 0 bytes
.../testsolidworksAssembly2013SP2.SLDASM | Bin 209408 -> 0 bytes
.../testsolidworksAssembly2014SP0.SLDASM | Bin 238080 -> 0 bytes
.../testsolidworksDrawing2013SP2.SLDDRW | Bin 180224 -> 0 bytes
.../testsolidworksDrawing2014SP0.SLDDRW | Bin 201216 -> 0 bytes
.../testsolidworksPart2013SP2.SLDPRT | Bin 1010176 -> 0 bytes
.../testsolidworksPart2014SP0.SLDPRT | Bin 1043456 -> 0 bytes
.../test/resources/test-documents/tika434.html | 914 ---
.../StringsConfig-full.properties | 18 -
.../StringsConfig-partial.properties | 16 -
.../TesseractOCRConfig-full.properties | 22 -
.../TesseractOCRConfig-partial.properties | 18 -
1102 files changed, 27195 insertions(+), 27182 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index f4024e3..f2651fa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -47,7 +47,6 @@
<modules>
<module>tika-parent</module>
<module>tika-core</module>
- <module>tika-test-resources</module>
<module>tika-parsers</module>
<module>tika-xmp</module>
<module>tika-serialization</module>
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index 0e768fa..e83bde5 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -61,15 +61,12 @@
<dependencies>
<!-- Test dependencies -->
<dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
- <artifactId>tika-test-resources</artifactId>
+ <artifactId>tika-core</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
@@ -89,6 +86,7 @@
<build>
<pluginManagement>
<plugins>
+ <!--
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
@@ -115,6 +113,7 @@
</execution>
</executions>
</plugin>
+ -->
</plugins>
</pluginManagement>
</build>
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
new file mode 100644
index 0000000..3b61f20
--- /dev/null
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
@@ -0,0 +1,93 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file downloads Apache OpenNLP NER models for testing the NamedEntityParser
+ */
+
+import org.apache.commons.io.IOUtils
+
+/**
+ * Copies input stream to output stream, additionally printing the progress.
+ * NOTE: this is optimized for large content
+ * @param inStr source stream
+ * @param outStr target stream
+ * @param totalLength the total length of the content (used to calculate progress)
+ * @return
+ */
+def copyWithProgress(InputStream inStr, OutputStream outStr, long totalLength){
+ int PROGRESS_DELAY = 1000;
+ byte[] buffer = new byte[1024 * 4]
+ long count = 0
+ int len
+ long tt = System.currentTimeMillis()
+ while ((len = inStr.read(buffer)) > 0) {
+ outStr.write(buffer, 0, len)
+ count += len
+ if (System.currentTimeMillis() - tt > PROGRESS_DELAY) {
+ println "${count * 100.0/totalLength}% : $count bytes of $totalLength"
+ tt = System.currentTimeMillis()
+ }
+ }
+ println "Copy complete. "
+ IOUtils.closeQuietly(inStr)
+ IOUtils.closeQuietly(outStr)
+}
+
+/**
+ * Downloads file
+ * @param urlStr url of file
+ * @param file path to store file
+ * @return
+ */
+def downloadFile(String urlStr, File file) {
+ println "GET : $urlStr -> $file"
+ urlConn = new URL(urlStr).openConnection()
+ contentLength = urlConn.getContentLengthLong()
+
+ file.getParentFile().mkdirs()
+ inStream = urlConn.getInputStream()
+ outStream = new FileOutputStream(file)
+ copyWithProgress(inStream, outStream, contentLength)
+ IOUtils.closeQuietly(outStream)
+ IOUtils.closeQuietly(inStream)
+ println "Download Complete.."
+}
+
+
+def urlPrefix = "http://opennlp.sourceforge.net/models-1.5"
+def prefixPath = "src/test/resources/org/apache/tika/parser/ner/opennlp/"
+
+// detecting proper path for test resources
+if (new File("tika-test-resources").exists() && new File("tika-app").exists() ) {
+ // running from parent maven project, but resources should go to sub-module
+ prefixPath = "tika-test-resources/" + prefixPath
+}
+
+def modelFiles = //filePath : url
+ [ (prefixPath + "ner-person.bin"): (urlPrefix + "/en-ner-person.bin"),
+ (prefixPath + "ner-location.bin"): (urlPrefix + "/en-ner-location.bin"),
+ (prefixPath + "ner-organization.bin"): (urlPrefix + "/en-ner-organization.bin"),
+ (prefixPath + "ner-date.bin"): (urlPrefix + "/en-ner-date.bin")]
+
+for (def entry : modelFiles) {
+ File file = new File(entry.key)
+ if (!file.exists()) {
+ downloadFile(entry.value, file)
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh
new file mode 100644
index 0000000..c17899e
--- /dev/null
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "Getting OpenNLP NER models"
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-person.bin" -O ner-person.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-location.bin" -O ner-location.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-organization.bin" -O ner-organization.bin
+
+# Additional 4
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-date.bin" -O ner-date.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-money.bin" -O ner-money.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-time.bin" -O ner-time.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-percentage.bin" -O ner-percentage.bin
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
new file mode 100644
index 0000000..e6fa39e
--- /dev/null
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+WEEK_DAY=(?i)((sun)|(mon)|(tues)|(thurs)|(fri)|((sat)(ur)?))(day)?
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml
new file mode 100644
index 0000000..267c399
--- /dev/null
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <parsers>
+ <parser class="org.apache.tika.parser.ner.NamedEntityParser">
+ <mime>text/plain</mime>
+ <mime>text/html</mime>
+ <mime>application/xhtml+xml</mime>
+ </parser>
+ </parsers>
+
+</properties>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt
new file mode 100644
index 0000000..a6c6e98
Binary files /dev/null and b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt
new file mode 100644
index 0000000..178fd9b
Binary files /dev/null and b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg
new file mode 100644
index 0000000..1b54bbc
Binary files /dev/null and b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg differ