You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/03/22 02:19:13 UTC

[01/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Repository: tika
Updated Branches:
  refs/heads/2.x cf9632388 -> e1498edbb


http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/testlargerbuffer.html
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/testlargerbuffer.html b/tika-test-resources/src/test/resources/test-documents/testlargerbuffer.html
new file mode 100644
index 0000000..545addd
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/testlargerbuffer.html
@@ -0,0 +1,827 @@
+<script language="javascript">
+
+function addToList(from,to)
+{
+    if(from.selectedIndex >= 0) {
+        isPresent = false;
+        var options=to.getElementsByTagName("option");
+
+        if(from.item(from.selectedIndex).value == "0") {
+            for(i=to.options.length-1; i>= 0; i--) {
+                to.removeChild(options[i]);
+            }
+        }
+        for (i=0; i< to.options.length; i++)
+        {
+            if(options[i].value == from.item(from.selectedIndex).value || options[i].value == "0")
+                isPresent = true;
+        }
+        if(!isPresent) {
+            var oOption = document.createElement("option");;
+            to.appendChild(oOption);
+            oOption.value = from.item(from.selectedIndex).value;
+            oOption.text  = from.item(from.selectedIndex).text;
+        }
+    }
+}
+
+function delFromList(to)
+{
+    if(to.selectedIndex >= 0) {
+      var options=to.getElementsByTagName("option");
+      to.removeChild(options[to.selectedIndex]);
+    }
+}
+
+function fillListToGet(form, to)
+{
+    var options=to.getElementsByTagName("option");
+    for (i=0; i< to.options.length; i++)
+    {
+        form.action += "&"+to.name+"="+options[i].value;
+    }
+}
+
+</script>
+<script language="javascript">
+
+function addToList(from,to)
+{
+    if(from.selectedIndex >= 0) {
+        isPresent = false;
+        var options=to.getElementsByTagName("option");
+
+        if(from.item(from.selectedIndex).value == "0") {
+            for(i=to.options.length-1; i>= 0; i--) {
+                to.removeChild(options[i]);
+            }
+        }
+        for (i=0; i< to.options.length; i++)
+        {
+            if(options[i].value == from.item(from.selectedIndex).value || options[i].value == "0")
+                isPresent = true;
+        }
+        if(!isPresent) {
+            var oOption = document.createElement("option");;
+            to.appendChild(oOption);
+            oOption.value = from.item(from.selectedIndex).value;
+            oOption.text  = from.item(from.selectedIndex).text;
+        }
+    }
+}
+
+function delFromList(to)
+{
+    if(to.selectedIndex >= 0) {
+      var options=to.getElementsByTagName("option");
+      to.removeChild(options[to.selectedIndex]);
+    }
+}
+
+function fillListToGet(form, to)
+{
+    var options=to.getElementsByTagName("option");
+    for (i=0; i< to.options.length; i++)
+    {
+        form.action += "&"+to.name+"="+options[i].value;
+    }
+}
+
+function fillOtherGet(form)
+{
+  if (document.all.price_from != "") {
+    form.action += "&price_from="+document.all.price_from.value;
+  }
+  if (document.all.price_to != "") {
+    form.action += "&price_to="+document.all.price_to.value;
+  }
+  if (document.all.square_from != "") {
+    form.action += "&square_from="+document.all.square_from.value;
+  }
+  if (document.all.square_to != "") {
+    form.action += "&square_to="+document.all.square_to.value;
+  }
+  if (document.all.MKAD != "") {
+    form.action += "&MKAD="+document.all.MKAD.value;
+  }
+}
+
+</script>
+<script language="javascript">
+
+function addToList(from,to)
+{
+    if(from.selectedIndex >= 0) {
+        isPresent = false;
+        var options=to.getElementsByTagName("option");
+
+        if(from.item(from.selectedIndex).value == "0") {
+            for(i=to.options.length-1; i>= 0; i--) {
+                to.removeChild(options[i]);
+            }
+        }
+        for (i=0; i< to.options.length; i++)
+        {
+            if(options[i].value == from.item(from.selectedIndex).value || options[i].value == "0")
+                isPresent = true;
+        }
+        if(!isPresent) {
+            var oOption = document.createElement("option");
+            to.appendChild(oOption);
+            oOption.value = from.item(from.selectedIndex).value;
+            oOption.text  = from.item(from.selectedIndex).text;
+        }
+    }
+}
+
+function delFromList(to)
+{
+    if(to.selectedIndex >= 0) {
+      var options=to.getElementsByTagName("option");
+      to.removeChild(options[to.selectedIndex]);
+    }
+}
+
+function fillListToGet(form, to)
+{
+    var options=to.getElementsByTagName("option");
+    for (i=0; i< to.options.length; i++)
+    {
+        form.action += "&"+to.name+"="+options[i].value;
+    }
+}
+
+function fillOtherGet(form)
+{
+  if (document.all.price_from != "") {
+    form.action += "&price_from="+document.all.price_from.value;
+  }
+  if (document.all.price_to != "") {
+    form.action += "&price_to="+document.all.price_to.value;
+  }
+  if (document.all.square_from != "") {
+    form.action += "&square_from="+document.all.square_from.value;
+  }
+  if (document.all.square_to != "") {
+    form.action += "&square_to="+document.all.square_to.value;
+  }
+  if (document.all.MKAD != "") {
+    form.action += "&MKAD="+document.all.MKAD.value;
+  }
+}
+
+</script>
+
+<html>
+<head>
+<title>������ �������, ����� ��������,  ������ ������ � ������. ������������ ������������. ������ �������� "���������-������������"
+
+</title>
+<link rel="SHORTCUT ICON" href="/favicon.ico" />
+<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
+<meta http-equiv="Content-Language" content="ru">
+<meta name="Keywords" content="��������� ������������, ������, �����, �����, ����, ��������,  �������,  ���������, �����, �������, �������, �������, ���, ������, �������, ������������, ����������, �������, ������������, ������, ������, ����">
+<meta name="Description" content="��������� ������������ "��������� ������������", "������� ����" ������. ������ � ������� ������������ � ����� ������������ � ������ � �����������: �������, ���������, ����������������, �������� � ������ ������� ���������, ��������, �������, ��������, ����, ����. ������ �������, ������, ���������. ����� ��������. ����� ������. ������ ����.">
+<meta http-equiv="description" content="��������� ������������ "��������� ������������", "������� ����" ������. ������ � ������� ������������ � ����� ������������ � ������ � �����������: �������, ���������, ����������������, �������� � ������ ������� ���������, ��������, �������, ��������, ����, ����. ������ �������, ������, ���������. ����� ��������. ����� ������. ������ ����.">
+<meta name="revisit" content="7 days">
+<meta name='yandex-verification' content='77a043af80883202' />
+
+<link rel="stylesheet" href="continent.css" type="text/css">
+</head>
+<body bgcolor="#FFFFFF" text="#000000" leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">
+<table width="100%" border="0" cellspacing="0" cellpadding="0" height="100%">
+  <tr>
+    <td height="10"> 
+      <noindex><table width="100%" border="0" cellspacing="0" cellpadding="0">
+        <tr>
+          <td><a title="������ ������� ������� ������" href="/default.asp"><img src="imgs/logo2.gif" Alt="������ ������� �������, ������, ������, �������, ���������" width="205" height="68" style="margin-top:13px; margin-bottom:3px; margin-left:13px;" border=0></a></td>
+          <td align=center valign=bottom>
+          
+            <a href='http://office.realty-guide.ru/rot/?key=289' target=_blank><img src='/imgs/banners/ban32.gif' border=0 width=500 height=75></a>
+          
+          </td>
+        </tr>
+      </table></noindex>
+    </td>
+  </tr>
+  <tr>
+    <td valign="top" height="100%"> 
+      <table width="100%" border="0" cellspacing="0" cellpadding="0" height="100%">
+        <tr>
+          <td width="228" bgcolor="#546154" valign="top" align=center> 
+            <table width="100%" border="0" cellspacing="0" cellpadding="0" height=402>
+              <tr> 
+                <td height="147" background="imgs/hd_bg2.gif" valign="top"><img src="imgs/h_fl.jpg" width="202" height="136" style="margin-top: 10px; margin-left: 14px;" alt="������ ������� �������, ������, ������, �������, ���������"></td>
+              </tr>
+              <tr> 
+                <td height="255" valign="top">
+                  <OBJECT classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"
+ codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,0,0"
+ WIDTH="228" HEIGHT="250" id="menu10" ALIGN="">
+ <PARAM NAME=movie VALUE="menu10.swf"> <PARAM NAME=quality VALUE=high> <PARAM NAME=bgcolor VALUE=#525E52> <EMBED src="menu10.swf" quality=high bgcolor=#525E52  WIDTH="228" HEIGHT="250" NAME="menu10" ALIGN=""
+ TYPE="application/x-shockwave-flash" PLUGINSPAGE="http://www.macromedia.com/go/getflashplayer"></EMBED>
+</OBJECT>
+                </td>
+              </tr>
+            </table>
+<a href="/kommvip.asp"><img width=169 height=114 src="/imgs/vipbanner3.gif" border=0 alt="� ������ ������� �� ������ ������������ �� ������������� ��� �������� �� ������ ������������ ������������: ������ ������, �������, ���������, ����, ����������, ��������� ���������� ���������� � �.�., ������������ ������� ��������� ������������ ������� � ���������� ���������-������������"></a>
+<br>
+<br>
+<a href="/arendavip.asp"><img width=169 height=114 src="/imgs/vipbanner_arenda.jpg" border=0 alt="� ������ ������� �� ������ ������������ �� ������������ ������������� �� ������ �����: ������ �������, ������ ���������, ������ ���, ������ ����� � �.�., ������������ ������� ��������� ������������ ������� � ���������� ���������-������������"></a>
+<br>
+<br>
+<noindex><a target=_blank title="���������� ������� ������" href="http://www.lagunadom.ru"><img width=169 height=114 src="/ban/ban_169_114.gif" border=0 alt="���������� ������� ������"></a></noindex>
+<br>
+<br>
+<br>
+<br>
+<noindex><!--a target=_blank title="������������� ����, ���������� ����, ����, ������������� ���, ���������� ��� - ��������-������� ���������� ����" href="http://www.nyelki.ru"><img width=169 height=94 src="/imgs/banner.jpg" border=0 alt="������������� ����, ���������� ����, ����, ������������� ���, ���������� ��� - ��������-������� ���������� ����"></a>
+<br>
+<br>
+<br>
+<br-->
+<!-- Yandex.Metrika -->
+<script src="//mc.yandex.ru/resource/watch.js" type="text/javascript"></script>
+<script type="text/javascript">
+try { var yaCounter177293 = new Ya.Metrika(177293); } catch(e){}
+</script>
+<noscript><div style="position: absolute;"><img src="//mc.yandex.ru/watch/177293" alt="" /></div></noscript>
+<!-- Yandex.Metrika -->
+<!--Rating@Mail.ru COUNTER--><script language="JavaScript" type="text/javascript"><!--
+d=document;var a='';a+=';r='+escape(d.referrer)
+js=10//--></script><script language="JavaScript1.1" type="text/javascript"><!--
+a+=';j='+navigator.javaEnabled()
+js=11//--></script><script language="JavaScript1.2" type="text/javascript"><!--
+s=screen;a+=';s='+s.width+'*'+s.height
+a+=';d='+(s.colorDepth?s.colorDepth:s.pixelDepth)
+js=12//--></script><script language="JavaScript1.3" type="text/javascript"><!--
+js=13//--></script><script language="JavaScript" type="text/javascript"><!--
+d.write('<a target=_blank href="http://top.mail.ru/jump?from=782596"'+
+' target=_top><img src="http://top.list.ru/counter'+
+'?id=782596;t=54;js='+js+a+';rand='+Math.random()+
+'" alt="�������@Mail.ru"'+' border=0 height=31 width=88/><\/a>')
+if(11<js)d.write('<'+'!-- ')//--></script><noscript><a
+target=_blank href="http://top.mail.ru/jump?from=782596"><img
+src="http://top.list.ru/counter?js=na;id=782596;t=54"
+border=0 height=31 width=88
+alt="�������@Mail.ru"/></a></noscript><script language="JavaScript" type="text/javascript"><!--
+if(11<js)d.write('--'+'>')//--></script><!--/COUNTER--></noindex>
+<br>
+ <br><br>
+          </td>
+          <td valign="top" bgcolor="#546154" height="100%"> 
+            <table width="100%" border="0" cellspacing="0" cellpadding="0" height="100%">
+              <tr>
+                <td height="4" background="imgs/hd_bg1.gif" align="right" valign="top" style="padding-right:13px; font-size:4px;">&nbsp;</td>
+              </tr>
+              <tr>
+                <td valign="top" style="padding-right:13px;" height="20" align=right background="imgs/hd_bg1n.gif">
+                  <table border=0 cellspacing=0 cellpadding=0 height=20>
+                  <tr>
+
+                    <td><img src="/imgs/tabl1_p.gif" height=20></td>
+                    <td valign=bottom background="/imgs/tabl2_p.gif"><div style="padding-bottom:2px;"><a style="color:#000000; text-decoration:none;" href="/basket.asp">�������</a></td>
+                    <td><img src="/imgs/tablr_pa.gif" height=20></td>
+                    <td valign=bottom background="/imgs/tabl2_a.gif"><div style="padding-bottom:2px; font-weight:bold; text-transform:uppercase;">���������-������������</div></div></td>
+                    <td><img src="/imgs/tabl3_a.gif" height=20></td>
+
+                  </tr>
+                  </table>
+                </td>
+              </tr>
+              <tr>
+                <td valign="top" style="padding-bottom:13px;padding-right:13px;" height="100%">
+
+<style>
+a:link {  color: #000000; text-decoration: none;}
+a:visited {  color: #000000; text-decoration: none;}
+a:active {  color: #000000; text-decoration: none;}
+a:hover {  color: #1FB21F; text-decoration: underline;}
+h2 { margin:0px; padding:0px; font-weight: normal; font-size: 8pt; text-decoration:none;}
+</style>
+<table width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor=#FFFFFF>
+<tr>
+  <td valign=top align=left><img src="/imgs/fp2.gif" width=37 height=31></td>
+  <td valign=top align=right><img src="/imgs/fp1.gif" width=257 height=24></td>
+</tr>
+</table>
+<table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+<tr>
+  <td valign=top>
+    <table width="100%" border="0" cellspacing="1" cellpadding="0" bgcolor=#FFFFFF>
+    <tr>
+      <td width=12 valign=top><img src="/imgs/fp_li2.gif" width=8 height=15></td>
+      <td>
+        <h1>���������-������������:</h1>
+<p style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;"><b>��������� ������������ "���������-������������"</b>, �������� � 1999 ����, ������������ ����� ������� �������������� �� ����� ������������ �. ������, ������� ������������ ������������ ������������ � �������������� ������ � ������� �������.</p>
+<p style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;">�������� "<b>���������-������������</b>" ���������� ���������� ������ ������ ���� ������������ �������� "������ � ������ � �����������" �� �������� ������� � ������������ ��������������� ���������������� ����� � ������.</p>
+<p style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;">�� ���������� ��������� <b>����������� ������</b>:</p>
+<ul style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;">
+<li><b>������ � ������� ������������ ������������ � ������ � �����������</b>: �������, ���������, ����������������, �������� � ������ ������� ���������.
+<li><b>�������, ������ ������ � ������-�������</b>, ������ ������ �����, ������ ����� ��� ��������.
+<li><b>������ � ������� ����� ������������ � ������</b>: ��������, �������.
+<li><b>������ � ������� ���������� ������������ � �����������</b>: ��������, ����, ����.
+<li><b>����������� ������������� ������ �� ������ � �����-������� ����� � ������� ���������</b>.
+<li><b>���������� � ����������� �������������������� ����������</b>.
+<li><b>������������� ���������� �������������</b>.
+</ul>
+<br>&nbsp;
+      </td>
+    </tr>
+    </table>
+    <table width="100%" border="0" cellspacing="0"  style="padding-left:12px;" cellpadding="0" bgcolor=#FFFFFF>
+    <tr>
+      <td valign=top width=50%><h1 style="color:red">������ ����� ������������</h1></td>
+      <td valign=top width=50%><h1 style="color:red">������ ������������ ������������</h1></td>
+    </tr>
+    <tr>
+      <td valign=top><br><h1>������ ������� � ������</h1></td>
+      <td valign=top><br><h1>������ ������ � ���������</h1></td>
+    </tr>
+    <tr>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ������� � ������" border=0 class=img1 src="/imgs/fp_i1.jpg"></td>
+          <td valign=top class=fp_small>����� �������� � ������ ���� ��������� ������������ ������� ������ � �������. 150 ����������� ����� �������� ���������. ���� �� ������ ������� ����������� ������ ���.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ �������" href="arenda_all.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ �������</h2></a></div>
+            <a title="�������� � ������" href="arenda_dball.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ������ � ���������" border=0 class=img1 src="/imgs/fp_i2.jpg"></td>
+          <td valign=top class=fp_small>������ ������. ����� ������� ��������� � ������. ����� 2000 ��������� ������ � ������. 100 ����� ����������� ����� ���� ������ ����. ���� �� ������ ������ ����������� ��������. ���� ������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ ������" href="komm.asp?kommtype_id=1&kommtype_id=8"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ������</h2></a></div>
+            <a title="����� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������ � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td valign=top><br><h1>������ ������ � ������</h1></td>
+      <td valign=top><br><h1>������ ������� � ������</h1></td>
+    </tr>
+    <tr>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ������ � ������" border=0 class=img1 src="/imgs/fp_i3.jpg"></td>
+          <td valign=top class=fp_small>������ ������ � ����� ������ ������ �� 1 ���� � �������� �������. � ��� ����� ����� ��������� � ������ ������� � ������������ ��������. ������ ����� �������? ������ �������� ������!</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ ������" href="arenda_all.asp?roomamount=-1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ������</h2></a></div>
+            <a title="������� � ������" href="arenda_dball.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������ � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ������� � ������" border=0 class=img1 src="/imgs/fp_i4.jpg"></td>
+          <td valign=top class=fp_small>����� ����� � ������ ��� �����������. �� ����� ����� �� ������ ����� ��������� ����������� �� ������ ��������� ��������� � ��������. ���� �������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ �������" href="komm.asp?kommtype_id=2"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ �������</h2></a></div>
+            <a title="������ � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td valign=top><br><h1>������ ������� �������. ���� �������.</h1></td>
+      <td valign=top><br><h1>������ ���������������� ���������</h1></td>
+    </tr>
+    <tr>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ������� �������" border=0 class=img1 src="/imgs/fp_i5.jpg"></td>
+          <td valign=top class=fp_small>��� ���, ��� ����� ����� ������� �������� ��� �������� � ������. � ��� �� ����� ����� 1000 �������� ������� ������������ � ������. ���� �������. ����� ������� ��������? �����������, �� �������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ �������" href="arenda_all.asp?elit=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ������� �������</h2></a></div>
+            <a title="�������� � ������" href="arenda_dball.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� ������� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ���������������� ���������" border=0 class=img1 src="/imgs/fp_i6.jpg"></td>
+          <td valign=top class=fp_small>��� ���, ��� ����� ����� ��� ����� ������������ � ������ ��� �����������. � ��� �� ���� �� ������ ����� ������� ����� ��������� ��� ������������ . ���� ���������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ �����������" href="komm.asp?kommtype_id=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ �����������</h2></a></div>
+            <a title="������������ � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ����������� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td valign=top><br><h1>���������� ������ �������</h1></td>
+      <td valign=top><br><h1>������ ���������</h1></td>
+    </tr>
+    <tr>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="���������� ������ �������" border=0 class=img1 src="/imgs/fp_i7.jpg"></td>
+          <td valign=top class=fp_small>������ �������, ��������������� � ������ �������� ���������, ��������� ������������ ���������� ����� �������� � ������ ���������. ���� ������� � ���������� ������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ �������" href="arendaday_results.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� ���������� ������ �������</h2></a></div>
+            <a title="�������� � ������" href="arendaday_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� � ���������� ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ���������" border=0 class=img1 src="/imgs/fp_i8.jpg"></td>
+          <td valign=top class=fp_small>��� ���, ��� ����� ����� ��� ����� �������. �� ���������� ������� ����� �������� ��������� � �������� � �������� ������� ������. ���� ���������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ ���������" href="komm.asp?kommtype_id=3"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ���������</h2></a></div>
+            <a title="�������� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td valign=top><br><h1>������ ��������� � ��� � �����������</h1></td>
+      <td valign=top><br><h1>������ ��������� ��� ��������� � ����</h1></td>
+    </tr>
+    <tr>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ��������� � ���" border=0 class=img1 src="/imgs/fp_i9.jpg"></td>
+          <td valign=top class=fp_small>���, ���� ���������� ������ �������� ��� ����������� ���� � �����������, ��������� ������������ ���������� ������� ����� ���������� ������������ . ����� ��� ����� ������� � ���� ��� ������. ����.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ ��������� ���" href="arenda_cottage.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� ������ ���������, ���</h2></a></div>
+            <a title="�������� ���� � ������" href="cottage_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ���������, ���, ����� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ���������� � ����" border=0 class=img1 src="/imgs/fp_i10.jpg"></td>
+          <td valign=top class=fp_small>������ ����� ��������� ��� ��������, ��� ��� ����. �� ���� ����� �� ������ ����� ����������� �� ������ ������������ ������������ ��� ������������ ������� � ����. ����� ��� ����� ��������, ����, ��� � ������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ ���������� ����" href="komm.asp?kommtype_id=5&kommtype_id=6"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ����������, ����</h2></a></div>
+            <a title="��������� � ���� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ���������� � ���� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td valign=top><br><h1>���������� ������ ��������� � �����������</h1></td>
+      <td valign=top><br><h1>������ ��������� ���������� ����������</h1></td>
+    </tr>
+    <tr>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="���������� ������ ���������" border=0 class=img1 src="/imgs/fp_i13.jpg"></td>
+          <td valign=top class=fp_small>�� ������ �������� �������� ��� ��������� � ���������� ����? ���� ��������� ������������ ���������� ����� ������� ���������. ����  ���������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ ���������" href="arenda_cottageday.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� ���������� ������ ���������</h2></a></div>
+            <a title="�������� � ������" href="cottageday_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������� � ���������� ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+      <td valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������ ��������� ���������� ����������" border=0 class=img1 src="/imgs/fp_i15.jpg"></td>
+          <td valign=top class=fp_small>����� ��������� ���������� ����������. ������� ����������� ����� ��� � ������. ���� �� ������������ ������������ ����������� ���������. ����.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������ ���������" href="komm.asp?kommtype_id=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ��������� ���������� ����������</h2></a></div>
+            <a title="��������� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������� ���������� ���������� � ������</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td colspan=2 valign=top><br><h1 style="color:red">������� ������������ ������������</h1></td>
+    </tr>
+    <tr>
+      <td colspan=2 align=center valign=top>
+        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+        <tr>
+          <td valign=top width=60><img width=60 height=60 alt="������� ������������ ������������" border=0 class=img1 src="/imgs/fp_i14.jpg"></td>
+          <td valign=top class=fp_small>���� �� ������ ������ ��������� ��� �������: ����, �������, �����, ������������, �� �� ������ ������������ � ������������� �� ������� ������������ ������������ ��� ������� ���� ������ �� ������� ��������� � ������. ����-������� ������������ �� �������.</td>
+        </tr>
+        <tr>
+          <td colspan=2>
+            <a title="������� ������" href="kommP.asp?kommtype_id=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ������</h2></a></div>
+            <a title="������� �������" href="kommP.asp?kommtype_id=2"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� �������</h2></a></div>
+            <a title="������� ���������" href="kommP.asp?kommtype_id=3"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ���������</h2></a></div>
+            <a title="������� ����������" href="kommP.asp?kommtype_id=5"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ����������</h2></a></div>
+            <a title="������� ����" href="kommP.asp?kommtype_id=6"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ����</h2></a></div>
+            <a title="������� �����������" href="kommP.asp?kommtype_id=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ���������������� ���������</h2></a></div>
+            <a title="������� ���������" href="kommP.asp?kommtype_id=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ���</h2></a></div>
+          </td>
+        </tr>
+        </table>
+      </td>
+    </tr>
+    </table>
+    <table width="100%" border="0" cellspacing="1" cellpadding="0" bgcolor=#FFFFFF>
+    <tr>
+      <td width=12 valign=top><img src="/imgs/fp_li2.gif" width=8 height=15></td>
+      <td>
+        <h1>������� ������������. ������:</h1>
+        <br>
+
+    <li><a href="/news.asp?id=69&curr=1"><h2>��������� �� ������ - ������� �������� ������������</h2></a>
+
+    <li><a href="/news.asp?id=68&curr=1"><h2>������ ��������!</h2></a>
+
+    <li><a href="/news.asp?id=67&curr=1"><h2>��� ������ ����������, ���� ����� ����������� �������� �������� �����?</h2></a>
+
+    <li><a href="/news.asp?id=66&curr=1"><h2>5 �������� ����� ������� �������� � ������</h2></a>
+
+    <li><a href="/news.asp?id=65&curr=1"><h2>���� ������� � ����������: ���� �������� � ���?</h2></a>
+
+    <li><a title="������� ������������" href="news.asp"><h2><b>������ ������� ������������...</b></h2></a>
+    <br>
+      </td>
+    </tr>
+    </table>
+  </td>
+  <td width=5>&nbsp;</td>
+  <td valign=top width=300>
+    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> ����������� <font color=red>��� ��������</font>:</h1>
+    <br>
+
+
+<table cellspacing=0 cellpadding=0 border=0 width=100%>
+<tr>
+  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
+  <td valign=top width=100%  bgcolor=white>
+    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
+    <tr>
+      <td width=1 valign=middle><img src='imgs/knop1.gif'></td>
+      <td class=text bgcolor=white valign=middle>
+          <a href="/arendaview_komm.asp?anketa_id=148110" class=menubig><b>������ ������</b></a>
+      </td>
+    </tr>
+    </table>
+    <table width=100% cellspacing=0 cellpadding=3 border=0>
+    <tr>
+      <td width=128 valign=top align=right nowrap>
+        <a href="/arendaview_komm.asp?anketa_id=148110"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos5/s_k_67491.jpg" border=0 alt="������ ������"></a>
+      </td>
+      <td valign=top nowrap style='padding-left:6px;'>
+        <a href='/arendaview_komm.asp?anketa_id=148110' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>���������� �.</b></p><p class='viprow'>7 �� �� ����</p><p class='viprow'>2100 - 2500 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>130 $/��.�./���</b></p></a>
+      </td>
+    </tr>
+    </table>
+    &nbsp;
+  </td>
+</tr>
+<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
+<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
+</table>
+    <br>
+
+
+<table cellspacing=0 cellpadding=0 border=0 width=300>
+<tr>
+  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
+  <td valign=top width=100%  bgcolor=white>
+    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
+    <tr>
+      <td valign=top width=1 valign=middle><img src='imgs/knop1.gif'></td>
+      <td class=text bgcolor=white valign=middle>
+          <a href="/arendaview_kommp.asp?anketa_id=167792" class=menubig><b>������� �����</b></a>
+      </td>
+    </tr>
+    </table>
+    <table width=100% cellspacing=0 cellpadding=3 border=0>
+    <tr>
+      <td width=128 valign=top align=right nowrap>
+        <a href="/arendaview_kommp.asp?anketa_id=167792"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos5/s_kp_96026.jpg" border=0 alt="������� �����"></a>
+      </td>
+      <td valign=top nowrap style='padding-left:6px;'>
+        <a href='/arendaview_kommp.asp?anketa_id=167792' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>�. ���������� �������</b></p><p class='viprow'>918 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>7&nbsp;344&nbsp;000 $</b></p></a>
+      </td>
+    </tr>
+    </table>
+    &nbsp;
+  </td>
+</tr>
+<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
+<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
+</table>
+    <br>
+
+<table cellspacing=0 cellpadding=0 border=0 width=100%>
+<tr>
+  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
+  <td valign=top width=100%  bgcolor=white>
+    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
+    <tr>
+      <td valign=top width=1 valign=middle><img src='imgs/knop1.gif'></td>
+      <td class=text bgcolor=white valign=middle>
+          <a href="/arendaview_all.asp?anketa_id=160328" class=menubig><b>������ 2-����. ��������</b></a>
+      </td>
+    </tr>
+    </table>
+    <table width=100% cellspacing=0 cellpadding=3 border=0>
+    <tr>
+      <td width=128 valign=top align=right nowrap>
+            <a href="/arendaview_all.asp?anketa_id=160328"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos1/s_ae_45253.jpg" border=0 alt="������ 2-����. ��������"></a>
+      </td>
+      <td valign=top nowrap style='padding-left:6px;'>
+        <a href='/arendaview_all.asp?anketa_id=160328' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>�. ��������</b></p><p class='viprow'>10 ����� ������ �� �����</p><p class='viprow'>��. ��������, ��� 25</p><p class='viprow'>����� ������� 60 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>100 000 $/���</b></p></a>
+      </td>
+    </tr>
+    </table>
+    &nbsp;
+  </td>
+</tr>
+<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
+<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
+</table>
+<br>
+
+
+<table cellspacing=0 cellpadding=0 border=0 width=100%>
+<tr>
+  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
+  <td valign=top width=100%  bgcolor=white>
+    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
+    <tr>
+      <td valign=top width=1 valign=middle><img src='imgs/knop1.gif'></td>
+      <td class=text bgcolor=white valign=middle>
+          <a href="/arendaview_cottage.asp?anketa_id=1761" class=menubig><b>������ ��������</b></a>
+      </td>
+    </tr>
+    </table>
+    <table width=100% cellspacing=0 cellpadding=3 border=0>
+    <tr>
+      <td width=128 valign=top align=right nowrap>
+        <a href="/arendaview_cottage.asp?anketa_id=1761"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos61/vipcot1761.jpg" border=0 alt="������ ��������"></a>
+      </td>
+      <td valign=top nowrap style='padding-left:6px;'>
+        <a href='/arendaview_cottage.asp?anketa_id=1761' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>������������ �.</b></p><p class='viprow'>15 �� �� ����</p><p class='viprow'>520 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>465 000 $/���</b></p></a>
+      </td>
+    </tr>
+    </table>
+    &nbsp;
+  </td>
+</tr>
+<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
+<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
+</table>
+<br>
+
+    <div class=ns><a title="������ ������������" href="kommvip.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������ ������������ ������������ ��� ��������</h2></a></div>
+    <div class=ns><a title="������ ������������ " href="kommvipp.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������� ������������ ������������ ��� ��������</h2></a></div>
+    <div class=ns><a title="������ �������" href="arendavip.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������ ������� ��� ��������</h2></a></div>
+    <div class=ns><a title="������ ������� " href="arendacotvip.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������ ���������, ���, ����� ��� ��������</h2></a></div>
+    <br>
+    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> �������� ������:</h1>
+    <p style="font-family:Times New Roman; font-size:12px; margin-top:10px; margin-bottom:0px;">
+    <b>����������</b> ������������, �������� ������� ��� ����� � ������, �� ������� ������ ������� ��� ����� ��������� �� �������� ��������. �� ������ �������� ������ ��� ��������� � ���� �� ��������.
+<br><i>������������ �� �������� ������ � ������� ������������ ���������</i>.
+</p>
+    <br>
+    <div class=ns><a target=_blank title="����� �������� " href="form_1.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������</h2></a></div>
+    <div class=ns><a target=_blank title="����� ������� " href="form_1.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������</h2></a></div>
+    <div class=ns><a target=_blank title="����� ������� ��������" href="form_1.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� ��������</h2></a></div>
+    <div class=ns><a target=_blank title="����� �������� ���������" href="form_1day.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������� ���������</h2></a></div>
+    <div class=ns><a target=_blank title="����� ��������, ����, ����" href="form_5s.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ���, ����</h2></a></div>
+    <div class=ns><a target=_blank title="����� �����" href="form_1off.asp?kommtypeid=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ����</h2></a></div>
+    <div class=ns><a target=_blank title="����� ������ � ������������" href="form_1off.asp?kommtypeid=2&kommtypeid=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �����, ������������</h2></a></div>
+    <div class=ns nowrap><a target=_blank title="����� ��������" href="form_1off.asp?kommtypeid=3&kommtypeid=5&kommtypeid=6&kommtypeid=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ��������, ����</h2></a></div>
+    <div class=ns><a target=_blank title="������� ����" href="form_6s.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� �������, ����, ���</h2></a></div>
+    <div class=ns><a target=_blank title="������� �����" href="form_1off.asp?kommtypeid=1&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� ����</h2></a></div>
+    <div class=ns><a target=_blank title="������� ������" href="form_1off.asp?kommtypeid=2&kommtypeid=4&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� �����, ������������</h2></a></div>
+    <div class=ns><a target=_blank title="������� ���������" href="form_1off.asp?kommtypeid=1&kommtypeid=2&kommtypeid=3&kommtypeid=4&kommtypeid=5&kommtypeid=6&kommtypeid=7&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� ������� ���������</h2></a></div>
+    <br>
+    <p style="font-family:Times New Roman; font-size:12px; margin-top:10px; margin-bottom:0px;">
+    <b>��������.</b> ���� �� ������ ����� � ������ ��������, �������, ����, �����, �������... ���� ���� ��� ���������� ������� ������������, �������� ������ � �� ������� ��� ����� ��� ������ ��������� �������, ������� � � ����������� �����. �� ������ ������������ � ������������� �� ������ � ������� ������������ �� ����� �����. ���� ����������� ���������. ��� ������������ ���������
 ��� �������������� ��������� ������ �������������.
+<br><i>��������! �� �� ����� ����������, �� ��������� �������������� �����, �������� ��������� � ������� ���������, ������ ������������ �� ����� ������.</i>
+    </p>
+    <br>
+    <div class=ns><a target=_blank title="����� �������� " href="form_3.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������</h2></a></div>
+    <div class=ns><a target=_blank title="����� ������� " href="form_3.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������</h2></a></div>
+    <div class=ns><a target=_blank title="����� ������� �������� " href="form_3.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� ��������</h2></a></div>
+    <div class=ns><a target=_blank title="����� �������� ���������" href="form_3day.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������� ���������</h2></a></div>
+    <div class=ns><a target=_blank title="����� ����" href="form_5.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ���, ����</h2></a></div>
+    <div class=ns><a target=_blank title="����� ���� " href="form_3off.asp?kommtypeid=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ����</h2></a></div>
+    <div class=ns><a target=_blank title="����� ����� " href="form_3off.asp?kommtypeid=2&kommtypeid=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �����, ������������</h2></a></div>
+    <div class=ns><a target=_blank title="����� ������� " href="form_3off.asp?kommtypeid=3&kommtypeid=5&kommtypeid=6&kommtypeid=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ��������, ����</h2></a></div>
+    <div class=ns><a target=_blank title="������ �������, ���� " href="form_6.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ �������, ����, ���</h2></a></div>
+    <div class=ns><a target=_blank title="������ ���� " href="form_3off.asp?kommtypeid=1&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ����</h2></a></div>
+    <div class=ns><a target=_blank title="������ ����� " href="form_3off.asp?kommtypeid=2&kommtypeid=4&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ �����, ������������</h2></a></div>
+    <div class=ns><a target=_blank title="������ ��������� " href="form_3off.asp?kommtypeid=1&kommtypeid=2&kommtypeid=3&kommtypeid=4&kommtypeid=5&kommtypeid=6&kommtypeid=7&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ������� ���������</h2></a></div>
+    <br>
+    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> ���������� ����� ���������� �� ������ ������������:</h1>
+    <br>
+    <div class=ns><a title="������ �������" href="freetables.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ������� � ������</h2></a></div>
+    <div class=ns><a title="������ ���������" href="freetables_komm.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ������� ���������</h2></a></div>
+    <div class=ns><a title="������ ��������� ���" href="freetables_cott.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ���������, ���, �����</h2></a></div>
+    <br>
+    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> ��������:</h1>
+    <br>
+    <div class=ns><a href="vakansii.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �� ������ ������������ ������������</h2></a></div>
+    <div class=ns><a href="vakansii.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �� ������ ������� � ������</h2></a></div>
+    <br>
+  </td>
+</tr>
+</table>
+<table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
+<tr>
+  <td bgcolor="#FFFFFF" colspan=2 valign="top" style="padding-top: 5px; padding-right: 5px; padding-bottom: 5px; padding-left: 7px">
+    <br>
+<div align=center>
+<a style="font-size:10px;" href="/default.asp">�������</a> ::
+<a style="font-size:10px;" href="/arenda_results.asp">������ �����</a> ::  
+<a style="font-size:10px;" href="/prodaga.asp">�������/������� �����</a> ::  
+<a style="font-size:10px;" href="/komm.asp">������������ ������������</a> ::  
+<a style="font-size:10px;" href="/nedvvrossii.asp">������������ � ������</a> ::  
+<a style="font-size:10px;" href="/docs.asp">���������� ����������</a> ::  
+<a style="font-size:10px;" href="/zemuchastki.asp">��������� �������</a> ::  
+<a style="font-size:10px;" href="/vakansii.asp">��������</a> ::  
+<a style="font-size:10px;" href="/questions.asp">������� ��������</a> ::
+<a style="font-size:10px;" href="/info.asp">���������� ����������</a> ::  
+<a style="font-size:10px;" href="/freetables.asp">����� ���������� �� ������������</a> ::  
+<a style="font-size:10px;" href="/links.asp">������� ������</a> ::  
+<a style="font-size:10px;" href="/kontakty.asp">��������</a>
+</div> 
+
+  </td>
+</tr>
+</table>
+</td>
+              </tr>
+            </table>
+          </td>
+        </tr>
+      </table>
+    </td>
+  </tr>
+    <tr>
+    <td height="20" style="padding-left:13px; padding-right:13px;">
+<table width="100%" border="0" cellspacing="0" cellpadding="0">
+  <tr>
+    <td class=copy>
+    &copy; 2001 � 2009 <a title="�������� ������������" href="/">��������� ������������</a> "���������-������������", "������� ����" -  ������ �������, ������ ������, ������ ���������.<br>
+    ���.: +7 495 737-7019&nbsp;&nbsp;&nbsp;����: +7 495 231-7755&nbsp;&nbsp;&nbsp;E-mail: <a href="mailto:info1@makler.su" style="color:black">info1@makler.su</a><br>
+</td>
+  </tr>
+</table>
+    </td>
+  </tr>
+</table><script type="text/javascript">
+var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+</script>
+<script type="text/javascript">
+try {
+var pageTracker = _gat._getTracker("UA-8971199-1");
+pageTracker._trackPageview();
+} catch(err) {}</script></body>
+</html>
+
+


[10/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-app/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
new file mode 100644
index 0000000..0ed428e
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -0,0 +1,312 @@
+package org.apache.tika.parser;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.parser.digesting.CommonsDigester;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.apache.tika.sax.ContentHandlerFactory;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class RecursiveParserWrapperTest {
+
+    @Test
+    public void testBasicXML() throws Exception {
+        List<Metadata> list = getMetadata(new Metadata(),
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
+        Metadata container = list.get(0);
+        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
+        //not much differentiates html from xml in this test file
+        assertTrue(content.indexOf("<p class=\"header\" />") > -1);
+    }
+
+    @Test
+    public void testBasicHTML() throws Exception {
+        List<Metadata> list = getMetadata(new Metadata(),
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.HTML, -1));
+        Metadata container = list.get(0);
+        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
+        //not much differentiates html from xml in this test file
+        assertTrue(content.indexOf("<p class=\"header\"></p>") > -1);
+    }
+
+    @Test
+    public void testBasicText() throws Exception {
+        List<Metadata> list = getMetadata(new Metadata(),
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
+        Metadata container = list.get(0);
+        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
+        assertTrue(content.indexOf("<p ") < 0);
+        assertTrue(content.indexOf("embed_0") > -1);
+    }
+
+    @Test
+    public void testIgnoreContent() throws Exception {
+        List<Metadata> list = getMetadata(new Metadata(),
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, -1));
+        Metadata container = list.get(0);
+        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
+        assertNull(content);
+    }
+
+
+    @Test
+    public void testCharLimit() throws Exception {
+        ParseContext context = new ParseContext();
+        Metadata metadata = new Metadata();
+
+        Parser wrapped = new AutoDetectParser();
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, 60));
+        InputStream stream = RecursiveParserWrapperTest.class.getResourceAsStream(
+                "/test-documents/test_recursive_embedded.docx");
+        wrapper.parse(stream, new DefaultHandler(), metadata, context);
+        List<Metadata> list = wrapper.getMetadata();
+
+        assertEquals(5, list.size());
+
+        int wlr = 0;
+        for (Metadata m : list) {
+            String limitReached = m.get(RecursiveParserWrapper.WRITE_LIMIT_REACHED);
+            if (limitReached != null && limitReached.equals("true")) {
+                wlr++;
+            }
+        }
+        assertEquals(1, wlr);
+
+    }
+
+    @Test
+    public void testMaxEmbedded() throws Exception {
+        int maxEmbedded = 4;
+        int totalNoLimit = 12;//including outer container file
+        ParseContext context = new ParseContext();
+        Metadata metadata = new Metadata();
+        String limitReached = null;
+
+        Parser wrapped = new AutoDetectParser();
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
+
+        InputStream stream = RecursiveParserWrapperTest.class.getResourceAsStream(
+                "/test-documents/test_recursive_embedded.docx");
+        wrapper.parse(stream, new DefaultHandler(), metadata, context);
+        List<Metadata> list = wrapper.getMetadata();
+        //test default
+        assertEquals(totalNoLimit, list.size());
+
+        limitReached = list.get(0).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_LIMIT_REACHED);
+        assertNull(limitReached);
+
+
+        wrapper.reset();
+        stream.close();
+
+        //test setting value
+        metadata = new Metadata();
+        stream = RecursiveParserWrapperTest.class.getResourceAsStream(
+                "/test-documents/test_recursive_embedded.docx");
+        wrapper.setMaxEmbeddedResources(maxEmbedded);
+        wrapper.parse(stream, new DefaultHandler(), metadata, context);
+        list = wrapper.getMetadata();
+
+        //add 1 for outer container file
+        assertEquals(maxEmbedded + 1, list.size());
+
+        limitReached = list.get(0).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_LIMIT_REACHED);
+        assertEquals("true", limitReached);
+
+        wrapper.reset();
+        stream.close();
+
+        //test setting value < 0
+        metadata = new Metadata();
+        stream = RecursiveParserWrapperTest.class.getResourceAsStream(
+                "/test-documents/test_recursive_embedded.docx");
+
+        wrapper.setMaxEmbeddedResources(-2);
+        wrapper.parse(stream, new DefaultHandler(), metadata, context);
+        assertEquals(totalNoLimit, list.size());
+        limitReached = list.get(0).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_LIMIT_REACHED);
+        assertNull(limitReached);
+    }
+
+    @Test
+    public void testEmbeddedResourcePath() throws Exception {
+
+        Set<String> targets = new HashSet<String>();
+        targets.add("/embed1.zip");
+        targets.add("/embed1.zip/embed2.zip");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed3.txt");
+        targets.add("/embed1.zip/embed2.zip/embed2a.txt");
+        targets.add("/embed1.zip/embed2.zip/embed2b.txt");
+        targets.add("/embed1.zip/embed1b.txt");
+        targets.add("/embed1.zip/embed1a.txt");
+        targets.add("/image1.emf");
+
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
+        List<Metadata> list = getMetadata(metadata,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
+        Metadata container = list.get(0);
+        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
+        assertTrue(content.indexOf("<p class=\"header\" />") > -1);
+
+        Set<String> seen = new HashSet<String>();
+        for (Metadata m : list) {
+            String path = m.get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH);
+            if (path != null) {
+                seen.add(path);
+            }
+        }
+        assertEquals(targets, seen);
+    }
+
+    @Test
+    public void testEmbeddedNPE() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
+        List<Metadata> list = getMetadata(metadata,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
+        //default behavior (user doesn't specify whether or not to catch embedded exceptions
+        //is to catch the exception
+        assertEquals(13, list.size());
+        Metadata mockNPEMetadata = list.get(10);
+        assertContains("java.lang.NullPointerException", mockNPEMetadata.get(RecursiveParserWrapper.EMBEDDED_EXCEPTION));
+
+        metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
+        list = getMetadata(metadata,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
+                false, null);
+
+        //Composite parser swallows caught TikaExceptions, IOExceptions and SAXExceptions
+        //and just doesn't bother to report that there was an exception.
+        assertEquals(12, list.size());
+    }
+
+    @Test
+    public void testPrimaryExcWEmbedded() throws Exception {
+        //if embedded content is handled and then
+        //the parser hits an exception in the container document,
+        //that the first element of the returned list is the container document
+        //and the second is the embedded content
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, "embedded_then_npe.xml");
+
+        ParseContext context = new ParseContext();
+        Parser wrapped = new AutoDetectParser();
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1), true);
+        String path = "/test-documents/mock/embedded_then_npe.xml";
+
+        InputStream stream = null;
+        boolean npe = false;
+        try {
+            stream = RecursiveParserWrapperTest.class.getResourceAsStream(
+                    path);
+            wrapper.parse(stream, new DefaultHandler(), metadata, context);
+        } catch (TikaException e) {
+            if (e.getCause().getClass().equals(NullPointerException.class)) {
+                npe = true;
+            }
+        } finally {
+            IOUtils.closeQuietly(stream);
+        }
+        assertTrue("npe", npe);
+
+        List<Metadata> metadataList = wrapper.getMetadata();
+        assertEquals(2, metadataList.size());
+        Metadata outerMetadata = metadataList.get(0);
+        Metadata embeddedMetadata = metadataList.get(1);
+        assertContains("main_content", outerMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
+        assertEquals("embedded_then_npe.xml", outerMetadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
+        assertEquals("Nikolai Lobachevsky", outerMetadata.get("author"));
+
+        assertContains("some_embedded_content", embeddedMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
+        assertEquals("embed1.xml", embeddedMetadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
+        assertEquals("embeddedAuthor", embeddedMetadata.get("author"));
+    }
+
+    @Test
+    public void testDigesters() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
+        List<Metadata> list = getMetadata(metadata,
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
+                true, new CommonsDigester(100000, CommonsDigester.DigestAlgorithm.MD5));
+        int i = 0;
+        Metadata m0 = list.get(0);
+        Metadata m6 = list.get(6);
+        String md5Key = "X-TIKA:digest:MD5";
+        assertEquals("59f626e09a8c16ab6dbc2800c685f772", list.get(0).get(md5Key));
+        assertEquals("ccdf3882e7e4c2454e28884db9b0a54d", list.get(6).get(md5Key));
+        assertEquals("a869bf6432ebd14e19fc79416274e0c9", list.get(7).get(md5Key));
+    }
+
+    private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory,
+                                       boolean catchEmbeddedExceptions,
+                                       DigestingParser.Digester digester) throws Exception {
+        ParseContext context = new ParseContext();
+        Parser wrapped = new AutoDetectParser();
+        if (digester != null) {
+            wrapped = new DigestingParser(wrapped, digester);
+        }
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
+                contentHandlerFactory, catchEmbeddedExceptions);
+        String path = metadata.get(Metadata.RESOURCE_NAME_KEY);
+        if (path == null) {
+            path = "/test-documents/test_recursive_embedded.docx";
+        } else {
+            path = "/test-documents/" + path;
+        }
+        InputStream stream = null;
+        try {
+            stream = TikaInputStream.get(RecursiveParserWrapperTest.class.getResource(path).toURI());
+            wrapper.parse(stream, new DefaultHandler(), metadata, context);
+        } finally {
+            IOUtils.closeQuietly(stream);
+        }
+        return wrapper.getMetadata();
+
+    }
+
+    private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory)
+            throws Exception {
+        return getMetadata(metadata, contentHandlerFactory, true, null);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/TestParsers.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/TestParsers.java b/tika-app/src/test/java/org/apache/tika/parser/TestParsers.java
new file mode 100644
index 0000000..cde3e78
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/TestParsers.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.tika.Tika;
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Junit test class for Tika {@link Parser}s.
+ */
+public class TestParsers extends TikaTest {
+
+    private TikaConfig tc;
+
+    private Tika tika;
+
+    @Before
+    public void setUp() throws Exception {
+        tc = TikaConfig.getDefaultConfig();
+        tika = new Tika(tc);
+    }
+
+    @Test
+    public void testWORDExtraction() throws Exception {
+
+        Path tmpFile = getTestDocumentAsTempFile("testWORD.doc");
+        Parser parser = tika.getParser();
+        Metadata metadata = new Metadata();
+        try (InputStream stream = Files.newInputStream(tmpFile)) {
+            parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+        } finally {
+            Files.delete(tmpFile);
+        }
+        assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
+    }
+
+    @Test
+    public void testEXCELExtraction() throws Exception {
+        final String expected = "Numbers and their Squares";
+        Path tmpFile = getTestDocumentAsTempFile("testEXCEL.xls");
+        try {
+            String s1 = tika.parseToString(tmpFile);
+            assertTrue("Text does not contain '" + expected + "'", s1
+                    .contains(expected));
+            Parser parser = tika.getParser();
+            Metadata metadata = new Metadata();
+            try (InputStream stream = Files.newInputStream(tmpFile)) {
+                parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+            }
+            assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
+        } finally {
+            Files.delete(tmpFile);
+        }
+    }
+
+    @Test
+    public void testOptionalHyphen() throws Exception {
+        String[] extensions =
+                new String[] { "ppt", "pptx", "doc", "docx", "rtf", "pdf"};
+        for (String extension : extensions) {
+            Path tmpFile = getTestDocumentAsTempFile("testOptionalHyphen." + extension);
+            String content = null;
+            try {
+                content = tika.parseToString(tmpFile);
+            } finally {
+                Files.delete(tmpFile);
+            }
+            assertTrue("optional hyphen was not handled for '" + extension + "' file type: " + content,
+                       content.contains("optionalhyphen") ||
+                       content.contains("optional\u00adhyphen") ||   // soft hyphen
+                       content.contains("optional\u200bhyphen") ||   // zero width space
+                       content.contains("optional\u2027"));          // hyphenation point
+            
+        }
+    }
+
+    @Test
+    public void testComment() throws Exception {
+        final String[] extensions = new String[] {"ppt", "pptx", "doc", 
+            "docx", "xls", "xlsx", "pdf", "rtf"};
+        for(String extension : extensions) {
+            verifyComment(extension, "testComment");
+        }
+    }
+
+    private void verifyComment(String extension, String fileName) throws Exception {
+        TemporaryResources tmp = new TemporaryResources();
+
+        String content = null;
+        Path tmpFile = null;
+        try {
+            tmpFile = getTestDocumentAsTempFile(fileName + "." + extension);
+            content = tika.parseToString(tmpFile);
+        } finally {
+            if (tmpFile != null) {
+                Files.delete(tmpFile);
+            }
+        }
+        assertTrue(extension + ": content=" + content + " did not extract text",
+                content.contains("Here is some text"));
+        assertTrue(extension + ": content=" + content + " did not extract comment",
+                content.contains("Here is a comment"));
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java b/tika-app/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
new file mode 100644
index 0000000..54c1427
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.fork;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.NotSerializableException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fork.ForkParser;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Test that the ForkParser correctly behaves when
+ *  wired in to the regular Parsers and their test data
+ */
+public class ForkParserIntegrationTest {
+
+    private Tika tika = new Tika(); // TODO Use TikaConfig instead, when it works
+
+    /**
+     * Simple text parsing
+     */
+    @Test
+    public void testForkedTextParsing() throws Exception {
+        ForkParser parser = new ForkParser(
+                ForkParserIntegrationTest.class.getClassLoader(),
+                tika.getParser());
+
+       try {
+          ContentHandler output = new BodyContentHandler();
+          InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
+                  "/test-documents/testTXT.txt");
+          ParseContext context = new ParseContext();
+          parser.parse(stream, output, new Metadata(), context);
+
+          String content = output.toString();
+          assertContains("Test d'indexation", content);
+          assertContains("http://www.apache.org", content);
+       } finally {
+          parser.close();
+       }
+    }
+   
+    /**
+     * This error has a message and an equals() implementation as to be able 
+     * to match it against the serialized version of itself.
+     */
+    static class AnError extends Error {
+        private static final long serialVersionUID = -6197267350768803348L;
+        private String message;
+        AnError(String message) {
+            super(message);
+            this.message = message;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+
+            AnError anError = (AnError) o;
+
+            if (!message.equals(anError.message)) return false;
+
+            return true;
+        }
+
+        @Override
+        public int hashCode() {
+            return message.hashCode();
+        }
+    }
+    
+    /**
+     * This error isn't serializable on the server, so can't be sent back
+     *  to the Fork Client once it has occured
+     */
+    static class WontBeSerializedError extends RuntimeException {
+       private static final long serialVersionUID = 1L;
+
+       WontBeSerializedError(String message) {
+          super(message);
+       }
+
+       private void writeObject(java.io.ObjectOutputStream out) {
+          RuntimeException e = new RuntimeException("Bang!");
+          boolean found = false;
+          for (StackTraceElement ste : e.getStackTrace()) {
+             if (ste.getClassName().equals(ForkParser.class.getName())) {
+                found = true;
+                break;
+             }
+          }
+          if (!found) {
+             throw e;
+          }
+       }
+    }
+    
+    static class BrokenParser implements Parser {
+        private static final long serialVersionUID = 995871497930817839L;
+        public Error err = new AnError("Simulated fail");
+        public RuntimeException re = null;
+        
+        public Set<MediaType> getSupportedTypes(ParseContext context) {
+            return new HashSet<MediaType>(Arrays.asList(MediaType.TEXT_PLAIN));
+        }
+
+        public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
+            if (re != null) throw re;
+            throw err;
+        }
+    }
+    
+    /**
+     * TIKA-831 Parsers throwing errors should be caught and
+     *  properly reported
+     */
+    @Test
+    public void testParsingErrorInForkedParserShouldBeReported() throws Exception {
+        BrokenParser brokenParser = new BrokenParser();
+        Parser parser = new ForkParser(ForkParser.class.getClassLoader(), brokenParser);
+        InputStream stream = getClass().getResourceAsStream("/test-documents/testTXT.txt");
+        
+        // With a serializable error, we'll get that back
+        try {
+            ContentHandler output = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+            parser.parse(stream, output, new Metadata(), context);
+            fail("Expected TikaException caused by Error");
+        } catch (TikaException e) {
+            assertEquals(brokenParser.err, e.getCause());
+        }
+        
+        // With a non serializable one, we'll get something else
+        // TODO Fix this test
+        brokenParser = new BrokenParser();
+        brokenParser.re= new WontBeSerializedError("Can't Serialize");
+        parser = new ForkParser(ForkParser.class.getClassLoader(), brokenParser);
+//        try {
+//           ContentHandler output = new BodyContentHandler();
+//           ParseContext context = new ParseContext();
+//           parser.parse(stream, output, new Metadata(), context);
+//           fail("Expected TikaException caused by Error");
+//       } catch (TikaException e) {
+//           assertEquals(TikaException.class, e.getCause().getClass());
+//           assertEquals("Bang!", e.getCause().getMessage());
+//       }
+    }
+    
+    /**
+     * If we supply a non serializable object on the ParseContext,
+     *  check we get a helpful exception back
+     */
+    @Test
+    public void testParserHandlingOfNonSerializable() throws Exception {
+       ForkParser parser = new ForkParser(
+             ForkParserIntegrationTest.class.getClassLoader(),
+             tika.getParser());
+       
+       ParseContext context = new ParseContext();
+       context.set(Detector.class, new Detector() {
+          public MediaType detect(InputStream input, Metadata metadata) {
+             return MediaType.OCTET_STREAM;
+          }
+       });
+
+       try {
+          ContentHandler output = new BodyContentHandler();
+          InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
+              "/test-documents/testTXT.txt");
+          parser.parse(stream, output, new Metadata(), context);
+          fail("Should have blown up with a non serializable ParseContext");
+       } catch(TikaException e) {
+          // Check the right details
+          assertNotNull(e.getCause());
+          assertEquals(NotSerializableException.class, e.getCause().getClass());
+          assertEquals("Unable to serialize ParseContext to pass to the Forked Parser", e.getMessage());
+       } finally {
+          parser.close();
+       }
+    }
+
+    /**
+     * TIKA-832
+     */
+    @Test
+    public void testAttachingADebuggerOnTheForkedParserShouldWork()
+            throws Exception {
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, tika.getParser());
+
+        ForkParser parser = new ForkParser(
+                ForkParserIntegrationTest.class.getClassLoader(),
+                tika.getParser());
+        parser.setJavaCommand(Arrays.asList("java", "-Xmx32m", "-Xdebug",
+                                            "-Xrunjdwp:transport=dt_socket,address=54321,server=y,suspend=n"));
+        try {
+            ContentHandler body = new BodyContentHandler();
+            InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
+                    "/test-documents/testTXT.txt");
+            parser.parse(stream, body, new Metadata(), context);
+            String content = body.toString();
+            assertContains("Test d'indexation", content);
+            assertContains("http://www.apache.org", content);
+        } finally {
+            parser.close();
+        }
+    }
+
+    /**
+     * TIKA-808 - Ensure that parsing of our test PDFs work under
+     * the Fork Parser, to ensure that complex parsing behaves
+     */
+    @Test
+    public void testForkedPDFParsing() throws Exception {
+        ForkParser parser = new ForkParser(
+                ForkParserIntegrationTest.class.getClassLoader(),
+                tika.getParser());
+        try {
+            ContentHandler output = new BodyContentHandler();
+            InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
+                    "/test-documents/testPDF.pdf");
+            ParseContext context = new ParseContext();
+            parser.parse(stream, output, new Metadata(), context);
+
+            String content = output.toString();
+            assertContains("Apache Tika", content);
+            assertContains("Tika - Content Analysis Toolkit", content);
+            assertContains("incubator", content);
+            assertContains("Apache Software Foundation", content);
+        } finally {
+            parser.close();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java b/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
new file mode 100644
index 0000000..52af12b
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
@@ -0,0 +1,251 @@
+package org.apache.tika.parser.mock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.Date;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.junit.Test;
+
+/**
+ * Somewhat bizarrely, we can't put the test of this test resource in tika-test-resources
+ * or else it will be called by every module that uses it.  Um, Yossarian!!!
+ */
+public class MockParserTest extends TikaTest {
+    private final static String M = "/test-documents/mock/";
+    private final static Parser PARSER = new AutoDetectParser();
+
+    @Override
+    public XMLResult getXML(String path, Metadata m) throws Exception {
+        //note that this is specific to MockParserTest with addition of M to the path!
+        InputStream is = getResourceAsStream(M+path);
+        try {
+            return super.getXML(is, PARSER, m);
+        } finally {
+            IOUtils.closeQuietly(is);
+        }
+    }
+
+    @Test
+    public void testExample() throws Exception {
+        Metadata m = new Metadata();
+        PrintStream out = System.out;
+        PrintStream err = System.err;
+        ByteArrayOutputStream outBos = new ByteArrayOutputStream();
+        ByteArrayOutputStream errBos = new ByteArrayOutputStream();
+        PrintStream tmpOut = new PrintStream(outBos, true, UTF_8.toString());
+        PrintStream tmpErr = new PrintStream(errBos, true, UTF_8.toString());
+        System.setOut(tmpOut);
+        System.setErr(tmpErr);
+        try {
+            assertThrowable("example.xml", m, IOException.class, "not another IOException");
+            assertMockParser(m);
+        } finally {
+            System.setOut(out);
+            System.setErr(err);
+        }
+        String outString = new String(outBos.toByteArray(), UTF_8);
+        assertContains("writing to System.out", outString);
+
+        String errString = new String(errBos.toByteArray(), UTF_8);
+        assertContains("writing to System.err", errString);
+
+    }
+
+    @Test
+    public void testNothingBad() throws Exception {
+        Metadata m = new Metadata();
+        String content = getXML("nothing_bad.xml", m).xml;
+        assertEquals("Geoffrey Chaucer", m.get("author"));
+        assertContains("<p>And bathed every veyne in swich licour,</p>", content);
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testNullPointer() throws Exception {
+        Metadata m = new Metadata();
+        assertThrowable("null_pointer.xml", m, NullPointerException.class, "null pointer message");
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testNullPointerNoMsg() throws Exception {
+        Metadata m = new Metadata();
+        assertThrowable("null_pointer_no_msg.xml", m, NullPointerException.class, null);
+        assertMockParser(m);
+    }
+
+
+    @Test
+    public void testSleep() throws Exception {
+        long start = new Date().getTime();
+        Metadata m = new Metadata();
+        String content = getXML("sleep.xml", m).xml;
+        assertMockParser(m);
+        long elapsed = new Date().getTime()-start;
+        //should sleep for at least 3000
+        boolean enoughTimeHasElapsed = elapsed > 2000;
+        assertTrue("not enough time has not elapsed: "+elapsed, enoughTimeHasElapsed);
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testHeavyHang() throws Exception {
+        long start = new Date().getTime();
+        Metadata m = new Metadata();
+
+        String content = getXML("heavy_hang.xml", m).xml;
+        assertMockParser(m);
+        long elapsed = new Date().getTime()-start;
+        //should sleep for at least 3000
+        boolean enoughTimeHasElapsed = elapsed > 2000;
+        assertTrue("not enough time has elapsed: "+elapsed, enoughTimeHasElapsed);
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testFakeOOM() throws Exception {
+        Metadata m = new Metadata();
+        assertThrowable("fake_oom.xml", m, OutOfMemoryError.class, "not another oom");
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testRealOOM() throws Exception {
+        //Note: we're not actually testing the diff between fake and real oom
+        //i.e. by creating child process and setting different -Xmx or
+        //memory profiling.
+        Metadata m = new Metadata();
+        assertThrowable("real_oom.xml", m, OutOfMemoryError.class, "Java heap space");
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testInterruptibleSleep() {
+        //Without static initialization of the parser, it can take ~1 second after t.start()
+        //before the parser actually calls parse.  This is
+        //just the time it takes to instantiate and call AutoDetectParser, do the detection, etc.
+        //This is not thread creation overhead.
+        ParserRunnable r = new ParserRunnable("sleep_interruptible.xml");
+        Thread t = new Thread(r);
+        t.start();
+        long start = new Date().getTime();
+        try {
+            Thread.sleep(1000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+
+        t.interrupt();
+
+        try {
+            t.join(10000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+        long elapsed = new Date().getTime()-start;
+        boolean shortEnough = elapsed < 2000;//the xml file specifies 3000
+        assertTrue("elapsed (" + elapsed + " millis) was not short enough", shortEnough);
+    }
+
+    @Test
+    public void testNonInterruptibleSleep() {
+        ParserRunnable r = new ParserRunnable("sleep_not_interruptible.xml");
+        Thread t = new Thread(r);
+        t.start();
+        long start = new Date().getTime();
+        try {
+            //make sure that the thread has actually started
+            Thread.sleep(1000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+        t.interrupt();
+        try {
+            t.join(20000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+        long elapsed = new Date().getTime()-start;
+        boolean longEnough = elapsed > 3000;//the xml file specifies 3000, this sleeps 1000
+        assertTrue("elapsed ("+elapsed+" millis) was not long enough", longEnough);
+    }
+
+    private class ParserRunnable implements Runnable {
+        private final String path;
+        ParserRunnable(String path) {
+            this.path = path;
+        }
+        @Override
+        public void run() {
+            Metadata m = new Metadata();
+            try {
+                getXML(path, m);
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            } finally {
+                assertMockParser(m);
+            }
+        }
+    }
+
+    private void assertThrowable(String path, Metadata m, Class<? extends Throwable> expected, String message) {
+
+        try {
+            getXML(path, m);
+        } catch (Throwable t) {
+            //if this is a throwable wrapped in a TikaException, use the cause
+            if (t instanceof TikaException && t.getCause() != null) {
+                t = t.getCause();
+            }
+            if (! (t.getClass().isAssignableFrom(expected))){
+                fail(t.getClass() +" is not assignable from "+expected);
+            }
+            if (message != null) {
+                assertEquals(message, t.getMessage());
+            }
+        }
+    }
+
+    private void assertMockParser(Metadata m) {
+        String[] parsers = m.getValues("X-Parsed-By");
+        //make sure that it was actually parsed by mock.
+        boolean parsedByMock = false;
+        for (String parser : parsers) {
+            if (parser.equals("org.apache.tika.parser.mock.MockParser")) {
+                parsedByMock = true;
+                break;
+            }
+        }
+        assertTrue("mock parser should have been called", parsedByMock);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/pkg/PackageTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/pkg/PackageTest.java b/tika-app/src/test/java/org/apache/tika/parser/pkg/PackageTest.java
new file mode 100644
index 0000000..c47a348
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/pkg/PackageTest.java
@@ -0,0 +1,335 @@
+package org.apache.tika.parser.pkg;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class PackageTest extends TikaTest {
+
+    private static final MediaType TYPE_7ZIP = MediaType.application("x-7z-compressed");
+    
+    private ParseContext recursingContext;
+    private Parser autoDetectParser;
+    
+    @Before
+    public void setUp() throws Exception {
+       
+       autoDetectParser = new AutoDetectParser();
+       recursingContext = new ParseContext();
+       recursingContext.set(Parser.class, autoDetectParser);
+    }
+    
+    @Test
+    public void testZlibParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/testTXT.zlib")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/zlib", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("http://www.apache.org", content);
+    }
+    
+    
+    @Test
+    public void testArParsing() throws Exception {
+        Parser parser = new AutoDetectParser();
+
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/testARofText.ar")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/x-archive",
+                metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("http://www.apache.org", content);
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/testARofSND.ar")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/x-archive",
+                     metadata.get(Metadata.CONTENT_TYPE));
+        content = handler.toString();
+        assertContains("testAU.au", content);
+    }
+    
+    @Test
+    public void testBzip2Parsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/test-documents.tbz2")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/x-bzip2", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("test-documents/testEXCEL.xls", content);
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("test-documents/testHTML.html", content);
+        assertContains("Test Indexation Html", content);
+        assertContains("test-documents/testOpenOffice2.odt", content);
+        assertContains("This is a sample Open Office document", content);
+        assertContains("test-documents/testPDF.pdf", content);
+        assertContains("Apache Tika", content);
+        assertContains("test-documents/testPPT.ppt", content);
+        assertContains("Sample Powerpoint Slide", content);
+        assertContains("test-documents/testRTF.rtf", content);
+        assertContains("indexation Word", content);
+        assertContains("test-documents/testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("test-documents/testWORD.doc", content);
+        assertContains("This is a sample Microsoft Word Document", content);
+        assertContains("test-documents/testXML.xml", content);
+        assertContains("Rida Benjelloun", content);
+    }
+    
+    @Test
+    public void testCompressParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/test-documents.tar.Z");
+        try {
+            parser.parse(stream, handler, metadata, recursingContext);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals("application/x-compress", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("test-documents/testEXCEL.xls", content);
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("test-documents/testHTML.html", content);
+        assertContains("Test Indexation Html", content);
+        assertContains("test-documents/testOpenOffice2.odt", content);
+        assertContains("This is a sample Open Office document", content);
+        assertContains("test-documents/testPDF.pdf", content);
+        assertContains("Apache Tika", content);
+        assertContains("test-documents/testPPT.ppt", content);
+        assertContains("Sample Powerpoint Slide", content);
+        assertContains("test-documents/testRTF.rtf", content);
+        assertContains("indexation Word", content);
+        assertContains("test-documents/testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("test-documents/testWORD.doc", content);
+        assertContains("This is a sample Microsoft Word Document", content);
+        assertContains("test-documents/testXML.xml", content);
+        assertContains("Rida Benjelloun", content);
+    }
+    
+    @Test
+    public void testGzipParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/test-documents.tgz")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/gzip", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("test-documents/testEXCEL.xls", content);
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("test-documents/testHTML.html", content);
+        assertContains("Test Indexation Html", content);
+        assertContains("test-documents/testOpenOffice2.odt", content);
+        assertContains("This is a sample Open Office document", content);
+        assertContains("test-documents/testPDF.pdf", content);
+        assertContains("Apache Tika", content);
+        assertContains("test-documents/testPPT.ppt", content);
+        assertContains("Sample Powerpoint Slide", content);
+        assertContains("test-documents/testRTF.rtf", content);
+        assertContains("indexation Word", content);
+        assertContains("test-documents/testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("test-documents/testWORD.doc", content);
+        assertContains("This is a sample Microsoft Word Document", content);
+        assertContains("test-documents/testXML.xml", content);
+        assertContains("Rida Benjelloun", content);
+    }
+    
+    @Test
+    public void testRarParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/test-documents.rar")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/x-rar-compressed", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("test-documents/testEXCEL.xls", content);
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("test-documents/testHTML.html", content);
+        assertContains("Test Indexation Html", content);
+        assertContains("test-documents/testOpenOffice2.odt", content);
+        assertContains("This is a sample Open Office document", content);
+        assertContains("test-documents/testPDF.pdf", content);
+        assertContains("Apache Tika", content);
+        assertContains("test-documents/testPPT.ppt", content);
+        assertContains("Sample Powerpoint Slide", content);
+        assertContains("test-documents/testRTF.rtf", content);
+        assertContains("indexation Word", content);
+        assertContains("test-documents/testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("test-documents/testWORD.doc", content);
+        assertContains("This is a sample Microsoft Word Document", content);
+        assertContains("test-documents/testXML.xml", content);
+        assertContains("Rida Benjelloun", content);
+    }
+    
+    @Test
+    public void test7ZParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        
+        // Ensure 7zip is a parsable format
+        assertTrue("No 7zip parser found", 
+                parser.getSupportedTypes(recursingContext).contains(TYPE_7ZIP));
+        
+        // Parse
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/test-documents.7z")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals(TYPE_7ZIP.toString(), metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("test-documents/testEXCEL.xls", content);
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("test-documents/testHTML.html", content);
+        assertContains("Test Indexation Html", content);
+        assertContains("test-documents/testOpenOffice2.odt", content);
+        assertContains("This is a sample Open Office document", content);
+        assertContains("test-documents/testPDF.pdf", content);
+        assertContains("Apache Tika", content);
+        assertContains("test-documents/testPPT.ppt", content);
+        assertContains("Sample Powerpoint Slide", content);
+        assertContains("test-documents/testRTF.rtf", content);
+        assertContains("indexation Word", content);
+        assertContains("test-documents/testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("test-documents/testWORD.doc", content);
+        assertContains("This is a sample Microsoft Word Document", content);
+        assertContains("test-documents/testXML.xml", content);
+        assertContains("Rida Benjelloun", content);
+    }
+    @Test
+    public void testTarParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/test-documents.tar")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/x-tar", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("test-documents/testEXCEL.xls", content);
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("test-documents/testHTML.html", content);
+        assertContains("Test Indexation Html", content);
+        assertContains("test-documents/testOpenOffice2.odt", content);
+        assertContains("This is a sample Open Office document", content);
+        assertContains("test-documents/testPDF.pdf", content);
+        assertContains("Apache Tika", content);
+        assertContains("test-documents/testPPT.ppt", content);
+        assertContains("Sample Powerpoint Slide", content);
+        assertContains("test-documents/testRTF.rtf", content);
+        assertContains("indexation Word", content);
+        assertContains("test-documents/testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("test-documents/testWORD.doc", content);
+        assertContains("This is a sample Microsoft Word Document", content);
+        assertContains("test-documents/testXML.xml", content);
+        assertContains("Rida Benjelloun", content);
+    }
+    
+    @Test
+    public void testZipParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/test-documents.zip")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/zip", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("testEXCEL.xls", content);
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("testHTML.html", content);
+        assertContains("Test Indexation Html", content);
+        assertContains("testOpenOffice2.odt", content);
+        assertContains("This is a sample Open Office document", content);
+        assertContains("testPDF.pdf", content);
+        assertContains("Apache Tika", content);
+        assertContains("testPPT.ppt", content);
+        assertContains("Sample Powerpoint Slide", content);
+        assertContains("testRTF.rtf", content);
+        assertContains("indexation Word", content);
+        assertContains("testTXT.txt", content);
+        assertContains("Test d'indexation de Txt", content);
+        assertContains("testWORD.doc", content);
+        assertContains("This is a sample Microsoft Word Document", content);
+        assertContains("testXML.xml", content);
+        assertContains("Rida Benjelloun", content);
+    }
+    
+    @Test
+    public void testSvgzParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = PackageTest.class.getResourceAsStream(
+                "/test-documents/testSVG.svgz")) {
+            parser.parse(stream, handler, metadata, recursingContext);
+        }
+
+        assertEquals("application/gzip", metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertContains("Test SVG image", content);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java b/tika-app/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java
new file mode 100644
index 0000000..eff076b
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.sax;
+
+import static org.apache.tika.TikaTest.assertContains;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.junit.Test;
+
+/**
+ * Test class for the {@link PhoneExtractingContentHandler}
+ * class. This demonstrates how to parse a document and retrieve any phone numbers
+ * found within.
+ *
+ * The phone numbers are added to a multivalued Metadata object under the key, "phonenumbers".
+ * You can get an array of phone numbers by calling metadata.getValues("phonenumber").
+ */
+public class PhoneExtractingContentHandlerTest {
+    @Test
+    public void testExtractPhoneNumbers() throws Exception {
+        Parser parser = new AutoDetectParser();
+        Metadata metadata = new Metadata();
+        // The PhoneExtractingContentHandler will examine any characters for phone numbers before passing them
+        // to the underlying Handler.
+        PhoneExtractingContentHandler handler = new PhoneExtractingContentHandler(new BodyContentHandler(), metadata);
+        try (InputStream stream = PhoneExtractingContentHandlerTest.class.getResourceAsStream("/test-documents/testPhoneNumberExtractor.odt")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+        String[] phoneNumbers = metadata.getValues("phonenumbers");
+        assertContains("9498888888", phoneNumbers[0]);
+        assertContains("9497777777", phoneNumbers[1]);
+        assertContains("9496666666", phoneNumbers[2]);
+        assertContains("9495555555", phoneNumbers[3]);
+        assertContains("4193404645", phoneNumbers[4]);
+        assertContains("9044687081", phoneNumbers[5]);
+        assertContains("2604094811", phoneNumbers[6]);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java b/tika-app/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java
new file mode 100644
index 0000000..62660c8
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.utils;
+
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.Parser;
+import org.junit.Test;
+
+public class ServiceLoaderUtilsTest extends TikaTest {
+    @Test
+    public void testOrdering() throws Exception {
+        //make sure that non Tika parsers come last
+        //which means that they'll overwrite Tika parsers and
+        //be preferred.
+        DefaultParser defaultParser = new DefaultParser();
+        int vorbisIndex = -1;
+        int fictIndex = -1;
+        int dcxmlIndex = -1;
+        int i = 0;
+        for (Parser p : defaultParser.getAllComponentParsers()) {
+            if ("class org.gagravarr.tika.VorbisParser".equals(p.getClass().toString())) {
+                vorbisIndex = i;
+            }
+            if ("class org.apache.tika.parser.xml.FictionBookParser".equals(p.getClass().toString())) {
+                fictIndex = i;
+            }
+            if ("class org.apache.tika.parser.xml.DcXMLParser".equals(p.getClass().toString())) {
+                dcxmlIndex = i;
+            }
+            i++;
+        }
+
+        assertNotEquals(vorbisIndex, fictIndex);
+        assertNotEquals(fictIndex, dcxmlIndex);
+        assertTrue(vorbisIndex > fictIndex);
+        assertTrue(fictIndex > dcxmlIndex);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/pom.xml
----------------------------------------------------------------------
diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index e63f101..2c61616 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -33,8 +33,17 @@
   <packaging>bundle</packaging>
   <name>Apache Tika core</name>
   <url>http://tika.apache.org/</url>
+  <properties>
+    <!-- NOTE: sync codec version with POI -->
+    <codec.version>1.10</codec.version>
+  </properties>
 
   <dependencies>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <version>${codec.version}</version>
+    </dependency>
     <!-- Optional OSGi dependencies, used only when running within OSGi -->
     <dependency>
       <groupId>org.osgi</groupId>
@@ -60,6 +69,13 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-test-resources</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
      <dependency>
       <groupId>org.ops4j.pax.exam</groupId>
       <artifactId>pax-exam-junit4</artifactId>
@@ -108,6 +124,9 @@
             <Bundle-DocURL>${project.url}</Bundle-DocURL>
             <Bundle-Activator>org.apache.tika.config.TikaActivator</Bundle-Activator>
             <Bundle-ActivationPolicy>lazy</Bundle-ActivationPolicy>
+            <Embed-Dependency>
+              commons-codec
+            </Embed-Dependency>
           </instructions>
         </configuration>
       </plugin>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/main/java/org/apache/tika/parser/digesting/CommonsDigester.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/digesting/CommonsDigester.java b/tika-core/src/main/java/org/apache/tika/parser/digesting/CommonsDigester.java
new file mode 100644
index 0000000..e7b2405
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/digesting/CommonsDigester.java
@@ -0,0 +1,295 @@
+package org.apache.tika.parser.digesting;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.ParseContext;
+
+/**
+ * Implementation of {@link org.apache.tika.parser.DigestingParser.Digester}
+ * that relies on commons.codec.digest.DigestUtils to calculate digest hashes.
+ * <p>
+ * This digester tries to use the regular mark/reset protocol on the InputStream.
+ * However, this wraps an internal BoundedInputStream, and if the InputStream
+ * is not fully read, then this will reset the stream and
+ * spool the InputStream to disk (via TikaInputStream) and then digest the file.
+ * <p>
+ * If a TikaInputStream is passed in and it has an underlying file that is longer
+ * than the {@link #markLimit}, then this digester digests the file directly.
+ *
+ */
+public class CommonsDigester implements DigestingParser.Digester {
+
+    public enum DigestAlgorithm {
+        //those currently available in commons.digest
+        MD2,
+        MD5,
+        SHA1,
+        SHA256,
+        SHA384,
+        SHA512;
+
+        String getMetadataKey() {
+            return TikaCoreProperties.TIKA_META_PREFIX+
+                    "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER+this.toString();
+        }
+    }
+
+    private final List<DigestAlgorithm> algorithms = new ArrayList<DigestAlgorithm>();
+    private final int markLimit;
+
+    public CommonsDigester(int markLimit, DigestAlgorithm... algorithms) {
+        Collections.addAll(this.algorithms, algorithms);
+        if (markLimit < 0) {
+            throw new IllegalArgumentException("markLimit must be >= 0");
+        }
+        this.markLimit = markLimit;
+    }
+
+    @Override
+    public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException {
+        InputStream tis = TikaInputStream.get(is);
+        long sz = -1;
+        if (((TikaInputStream)tis).hasFile()) {
+            sz = ((TikaInputStream)tis).getLength();
+        }
+        //if the file is definitely a file,
+        //and its size is greater than its mark limit,
+        //just digest the underlying file.
+        if (sz > markLimit) {
+            digestFile(((TikaInputStream)tis).getFile(), m);
+            return;
+        }
+
+        //try the usual mark/reset stuff.
+        //however, if you actually hit the bound,
+        //then stop and spool to file via TikaInputStream
+        SimpleBoundedInputStream bis = new SimpleBoundedInputStream(markLimit, tis);
+        boolean finishedStream = false;
+        for (DigestAlgorithm algorithm : algorithms) {
+            bis.mark(markLimit + 1);
+            finishedStream = digestEach(algorithm, bis, m);
+            bis.reset();
+            if (!finishedStream) {
+                break;
+            }
+        }
+        if (!finishedStream) {
+            digestFile(((TikaInputStream)tis).getFile(), m);
+        }
+    }
+
+    private void digestFile(File f, Metadata m) throws IOException {
+        for (DigestAlgorithm algorithm : algorithms) {
+            try (InputStream is = new FileInputStream(f)) {
+                digestEach(algorithm, is, m);
+            }
+        }
+    }
+
+    /**
+     *
+     * @param algorithm algo to use
+     * @param is input stream to read from
+     * @param metadata metadata for reporting the digest
+     * @return whether or not this finished the input stream
+     * @throws IOException
+     */
+    private boolean digestEach(DigestAlgorithm algorithm,
+                            InputStream is, Metadata metadata) throws IOException {
+        String digest = null;
+        try {
+            switch (algorithm) {
+                case MD2:
+                    digest = DigestUtils.md2Hex(is);
+                    break;
+                case MD5:
+                    digest = DigestUtils.md5Hex(is);
+                    break;
+                case SHA1:
+                    digest = DigestUtils.sha1Hex(is);
+                    break;
+                case SHA256:
+                    digest = DigestUtils.sha256Hex(is);
+                    break;
+                case SHA384:
+                    digest = DigestUtils.sha384Hex(is);
+                    break;
+                case SHA512:
+                    digest = DigestUtils.sha512Hex(is);
+                    break;
+                default:
+                    throw new IllegalArgumentException("Sorry, not aware of algorithm: " + algorithm.toString());
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+            //swallow, or should we throw this?
+        }
+        if (is instanceof SimpleBoundedInputStream) {
+            if (((SimpleBoundedInputStream)is).hasHitBound()) {
+                return false;
+            }
+        }
+        metadata.set(algorithm.getMetadataKey(), digest);
+        return true;
+    }
+
+    /**
+     *
+     * @param s comma-delimited (no space) list of algorithms to use: md5,sha256
+     * @return
+     */
+    public static DigestAlgorithm[] parse(String s) {
+        assert(s != null);
+
+        List<DigestAlgorithm> ret = new ArrayList<DigestAlgorithm>();
+        for (String algoString : s.split(",")) {
+            String uc = algoString.toUpperCase(Locale.ROOT);
+            if (uc.equals(DigestAlgorithm.MD2.toString())) {
+                ret.add(DigestAlgorithm.MD2);
+            } else if (uc.equals(DigestAlgorithm.MD5.toString())) {
+                ret.add(DigestAlgorithm.MD5);
+            } else if (uc.equals(DigestAlgorithm.SHA1.toString())) {
+                ret.add(DigestAlgorithm.SHA1);
+            } else if (uc.equals(DigestAlgorithm.SHA256.toString())) {
+                ret.add(DigestAlgorithm.SHA256);
+            } else if (uc.equals(DigestAlgorithm.SHA384.toString())) {
+                ret.add(DigestAlgorithm.SHA384);
+            } else if (uc.equals(DigestAlgorithm.SHA512.toString())) {
+                ret.add(DigestAlgorithm.SHA512);
+            } else {
+                StringBuilder sb = new StringBuilder();
+                int i = 0;
+                for (DigestAlgorithm algo : DigestAlgorithm.values()) {
+                    if (i++ > 0) {
+                        sb.append(", ");
+                    }
+                    sb.append(algo.toString());
+                }
+                throw new IllegalArgumentException("Couldn't match " + s + " with any of: " + sb.toString());
+            }
+        }
+        return ret.toArray(new DigestAlgorithm[ret.size()]);
+    }
+
+    /**
+     * Very slight modification of Commons' BoundedInputStream
+     * so that we can figure out if this hit the bound or not.
+     */
+    private class SimpleBoundedInputStream extends InputStream {
+        private final static int EOF = -1;
+        private final long max;
+        private final InputStream in;
+        private long pos;
+        boolean hitBound = false;
+
+        private SimpleBoundedInputStream(long max, InputStream in) {
+            this.max = max;
+            this.in = in;
+        }
+
+        @Override
+        public int read() throws IOException {
+            if (max >= 0 && pos >= max) {
+                hitBound = true;
+                return EOF;
+            }
+            final int result = in.read();
+            pos++;
+            return result;
+        }
+
+        /**
+         * Invokes the delegate's <code>read(byte[])</code> method.
+         * @param b the buffer to read the bytes into
+         * @return the number of bytes read or -1 if the end of stream or
+         * the limit has been reached.
+         * @throws IOException if an I/O error occurs
+         */
+        @Override
+        public int read(final byte[] b) throws IOException {
+            return this.read(b, 0, b.length);
+        }
+
+        /**
+         * Invokes the delegate's <code>read(byte[], int, int)</code> method.
+         * @param b the buffer to read the bytes into
+         * @param off The start offset
+         * @param len The number of bytes to read
+         * @return the number of bytes read or -1 if the end of stream or
+         * the limit has been reached.
+         * @throws IOException if an I/O error occurs
+         */
+        @Override
+        public int read(final byte[] b, final int off, final int len) throws IOException {
+            if (max>=0 && pos>=max) {
+                return EOF;
+            }
+            final long maxRead = max>=0 ? Math.min(len, max-pos) : len;
+            final int bytesRead = in.read(b, off, (int)maxRead);
+
+            if (bytesRead==EOF) {
+                return EOF;
+            }
+
+            pos+=bytesRead;
+            return bytesRead;
+        }
+
+        /**
+         * Invokes the delegate's <code>skip(long)</code> method.
+         * @param n the number of bytes to skip
+         * @return the actual number of bytes skipped
+         * @throws IOException if an I/O error occurs
+         */
+        @Override
+        public long skip(final long n) throws IOException {
+            final long toSkip = max>=0 ? Math.min(n, max-pos) : n;
+            final long skippedBytes = in.skip(toSkip);
+            pos+=skippedBytes;
+            return skippedBytes;
+        }
+
+        @Override
+        public void reset() throws IOException {
+            in.reset();
+        }
+
+        @Override
+        public void mark(int readLimit) {
+            in.mark(readLimit);
+        }
+
+        public boolean hasHitBound() {
+            return hitBound;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/java/org/apache/tika/TikaTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/TikaTest.java b/tika-core/src/test/java/org/apache/tika/TikaTest.java
index 2c6f21f..1edf91c 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaTest.java
@@ -26,6 +26,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
@@ -74,6 +77,25 @@ public abstract class TikaTest {
        }
    }
 
+
+    /**
+     * Copies test file from "test-documents" to a temp file.
+     * Consumers are responsible for deleting the temp file after use.
+     *
+     * @param name
+     * @return
+     * @throws IOException
+     */
+   public Path getTestDocumentAsTempFile(String name) throws IOException{
+       Path tmp = Files.createTempFile("tika-test", "");
+       Files.copy(getResourceAsStream("/test-documents/"+name), tmp, StandardCopyOption.REPLACE_EXISTING);
+       return tmp;
+   }
+
+   public InputStream getTestDocumentAsStream(String name) {
+       return TikaInputStream.get(getResourceAsStream("/test-documents/"+name));
+   }
+
    public InputStream getResourceAsStream(String name) {
        InputStream stream = this.getClass().getResourceAsStream(name);
        if (stream == null) {
@@ -106,36 +128,50 @@ public abstract class TikaTest {
         }
     }
 
+    protected XMLResult getXML(String filePath, Parser parser, Metadata metadata, ParseContext context) throws Exception {
+        return getXML(getTestDocumentAsStream(filePath), parser, metadata, context);
+    }
+
     protected XMLResult getXML(String filePath, Parser parser, Metadata metadata) throws Exception {
-        return getXML(getResourceAsStream("/test-documents/" + filePath), parser, metadata);
+        return getXML(getTestDocumentAsStream(filePath), parser, metadata);
     }
 
     protected XMLResult getXML(String filePath, Metadata metadata) throws Exception {
-        return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), metadata);
+        Parser parser = new AutoDetectParser();
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, parser);
+
+        return getXML(getTestDocumentAsStream(filePath), parser, metadata, context);
+    }
+
+    protected XMLResult getXML(String filePath, Parser parser) throws Exception {
+        //send in empty parse context so that only outer parser is used
+        return getXML(getTestDocumentAsStream(filePath), parser, new Metadata(), new ParseContext());
     }
 
     protected XMLResult getXML(String filePath) throws Exception {
-        return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), new Metadata());
+        return getXML(filePath, new Metadata());
     }
 
     protected XMLResult getXML(InputStream input, Parser parser, Metadata metadata) throws Exception {
-      ParseContext context = new ParseContext();
-      context.set(Parser.class, parser);
-
-      try {
-          ContentHandler handler = new ToXMLContentHandler();
-          parser.parse(input, handler, metadata, context);
-          return new XMLResult(handler.toString(), metadata);
-      } finally {
-          input.close();
-      }
-  }
+      return getXML(input, parser, metadata, new ParseContext());
+    }
 
-    /**
-     * Basic text extraction.
-     * <p>
-     * Tries to close input stream after processing.
-     */
+    protected XMLResult getXML(InputStream input, Parser parser, Metadata metadata, ParseContext context) throws Exception {
+        try {
+            ContentHandler handler = new ToXMLContentHandler();
+            parser.parse(input, handler, metadata, context);
+            return new XMLResult(handler.toString(), metadata);
+        } finally {
+            input.close();
+        }
+    }
+
+        /**
+         * Basic text extraction.
+         * <p>
+         * Tries to close input stream after processing.
+         */
     public String getText(InputStream is, Parser parser, ParseContext context, Metadata metadata) throws Exception{
         ContentHandler handler = new BodyContentHandler(1000000);
         try {

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java b/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java
index c815607..d2f3b40 100644
--- a/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java
+++ b/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java
@@ -22,13 +22,13 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
-import org.apache.tika.mime.MimeDetectionTest;
 import org.junit.Before;
 import org.junit.Test;
 
-public class MimeDetectionWithNNTest {
+public class MimeDetectionWithNNTest extends TikaTest {
 
 	private Detector detector;
 
@@ -88,13 +88,13 @@ public class MimeDetectionWithNNTest {
 
 	private void testUrl(String expected, String url, String file)
 			throws IOException {
-		InputStream in = MimeDetectionTest.class.getResourceAsStream(file);
+		InputStream in = getTestDocumentAsStream(file);
 		testStream(expected, url, in);
 	}
 
 	private void testFile(String expected, String filename) throws IOException {
 
-		InputStream in = MimeDetectionTest.class.getResourceAsStream(filename);
+		InputStream in = getTestDocumentAsStream(filename);
 		testStream(expected, filename, in);
 	}
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 1f986da..31df3ec 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -27,12 +27,13 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.junit.Before;
 import org.junit.Test;
 
-public class MimeDetectionTest {
+public class MimeDetectionTest extends TikaTest {
 
     private MimeTypes mimeTypes;
 
@@ -136,12 +137,12 @@ public class MimeDetectionTest {
     }
 
     private void testUrl(String expected, String url, String file) throws IOException{
-        InputStream in = getClass().getResourceAsStream(file);
+        InputStream in = getTestDocumentAsStream(file);
         testStream(expected, url, in);
     }
 
     private void testFile(String expected, String filename) throws IOException {
-        InputStream in = getClass().getResourceAsStream(filename);
+        InputStream in = getTestDocumentAsStream(filename);
         testStream(expected, filename, in);
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
index 35c75b7..415961f 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
@@ -27,11 +27,12 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.junit.Before;
 import org.junit.Test;
 
-public class ProbabilisticMimeDetectionTest {
+public class ProbabilisticMimeDetectionTest extends TikaTest {
 
     private ProbabilisticMimeDetectionSelector proDetector;
 
@@ -130,12 +131,12 @@ public class ProbabilisticMimeDetectionTest {
 
     private void testUrl(String expected, String url, String file)
             throws IOException {
-        InputStream in = getClass().getResourceAsStream(file);
+        InputStream in = getTestDocumentAsStream(file);
         testStream(expected, url, in);
     }
 
     private void testFile(String expected, String filename) throws IOException {
-        InputStream in = getClass().getResourceAsStream(filename);
+        InputStream in = getTestDocumentAsStream(filename);
         testStream(expected, filename, in);
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
index 5605300..a6dc7f3 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
@@ -29,6 +29,7 @@ import java.io.InputStream;
 import java.net.URL;
 
 import org.apache.tika.Tika;
+import org.apache.tika.TikaTest;
 import org.apache.tika.config.ServiceLoader;
 import org.apache.tika.detect.DefaultProbDetector;
 import org.apache.tika.metadata.Metadata;
@@ -36,7 +37,7 @@ import org.apache.tika.mime.ProbabilisticMimeDetectionSelector.Builder;
 import org.junit.Before;
 import org.junit.Test;
 
-public class ProbabilisticMimeDetectionTestWithTika {
+public class ProbabilisticMimeDetectionTestWithTika extends TikaTest {
 
     private ProbabilisticMimeDetectionSelector proSelector;
     private MediaTypeRegistry registry;
@@ -151,12 +152,12 @@ public class ProbabilisticMimeDetectionTestWithTika {
 
     private void testUrl(String expected, String url, String file)
             throws IOException {
-        InputStream in = getClass().getResourceAsStream(file);
+        InputStream in = getTestDocumentAsStream(file);
         testStream(expected, url, in);
     }
 
     private void testFile(String expected, String filename) throws IOException {
-        InputStream in = getClass().getResourceAsStream(filename);
+        InputStream in = getTestDocumentAsStream(filename);
         testStream(expected, filename, in);
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/java/org/apache/tika/osgi/BundleIT.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/osgi/BundleIT.java b/tika-core/src/test/java/org/apache/tika/osgi/BundleIT.java
index f3397d9..696d5e6 100644
--- a/tika-core/src/test/java/org/apache/tika/osgi/BundleIT.java
+++ b/tika-core/src/test/java/org/apache/tika/osgi/BundleIT.java
@@ -18,27 +18,17 @@ package org.apache.tika.osgi;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.assertNotNull;
 import static org.ops4j.pax.exam.CoreOptions.bundle;
 import static org.ops4j.pax.exam.CoreOptions.junitBundles;
 import static org.ops4j.pax.exam.CoreOptions.options;
-import static org.ops4j.pax.exam.CoreOptions.mavenBundle;
 
 import javax.inject.Inject;
-
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringWriter;
-import java.io.Writer;
 import java.net.URISyntaxException;
 import java.util.Set;
 
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.osgi.TikaService;
 import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.ops4j.pax.exam.Configuration;
@@ -48,7 +38,6 @@ import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy;
 import org.ops4j.pax.exam.spi.reactors.PerMethod;
 import org.osgi.framework.Bundle;
 import org.osgi.framework.BundleContext;
-import org.xml.sax.ContentHandler;
 
 @RunWith(PaxExam.class)
 @ExamReactorStrategy(PerMethod.class)

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb b/tika-core/src/test/resources/org/apache/tika/mime/GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb
deleted file mode 100644
index 0bffdca..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb and /dev/null differ


[05/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
deleted file mode 100644
index c3d13b7..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.mime;
-
-// Junit imports
-import static java.nio.charset.StandardCharsets.UTF_16BE;
-import static java.nio.charset.StandardCharsets.UTF_16LE;
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNotSame;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URL;
-
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * 
- * Test Suite for the {@link MimeTypes} repository.
- * 
- */
-public class TestMimeTypes {
-
-    private Tika tika;
-
-    private MimeTypes repo;
-
-    private URL u;
-
-    private static final File f = new File("/a/b/c/x.pdf");
-
-    @Before
-    public void setUp() throws Exception{
-        TikaConfig config = TikaConfig.getDefaultConfig();
-        repo = config.getMimeRepository();
-        tika = new Tika(config);
-        u = new URL("http://mydomain.com/x.pdf?x=y");
-    }
-
-    @Test
-    public void testCaseSensitivity() {
-        String type = tika.detect("test.PDF");
-        assertNotNull(type);
-        assertEquals(type, tika.detect("test.pdf"));
-        assertEquals(type, tika.detect("test.PdF"));
-        assertEquals(type, tika.detect("test.pdF"));
-    }
-
-    @Test
-    public void testLoadMimeTypes() throws MimeTypeException {
-        assertNotNull(repo.forName("application/octet-stream"));
-        assertNotNull(repo.forName("text/x-tex"));
-    }
-
-    /**
-     * Tests MIME type determination based solely on the URL's extension.
-     */
-    @Test
-    public void testGuessMimeTypes() throws Exception {
-        assertTypeByName("application/pdf", "x.pdf");
-        assertEquals("application/pdf", tika.detect(u.toExternalForm()));
-        assertEquals("application/pdf", tika.detect(f.getPath()));
-        assertTypeByName("text/plain", "x.txt");
-        assertTypeByName("text/html", "x.htm");
-        assertTypeByName("text/html", "x.html");
-        assertTypeByName("application/xhtml+xml", "x.xhtml");
-        assertTypeByName("application/xml", "x.xml");
-        assertTypeByName("application/zip", "x.zip");
-        assertTypeByName("application/vnd.oasis.opendocument.text", "x.odt");
-        assertTypeByName("application/octet-stream", "x.unknown");
-
-        // Test for the MS Office media types and file extensions listed in
-        // http://blogs.msdn.com/vsofficedeveloper/pages/Office-2007-Open-XML-MIME-Types.aspx
-        assertTypeByName("application/msword", "x.doc");
-        assertTypeByName("application/msword", "x.dot");
-        assertTypeByName("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "x.docx");
-        assertTypeByName("application/vnd.openxmlformats-officedocument.wordprocessingml.template", "x.dotx");
-        assertTypeByName("application/vnd.ms-word.document.macroenabled.12", "x.docm");
-        assertTypeByName("application/vnd.ms-word.template.macroenabled.12", "x.dotm");
-        assertTypeByName("application/vnd.ms-excel", "x.xls");
-        assertTypeByName("application/vnd.ms-excel", "x.xlt");
-        assertTypeByName("application/vnd.ms-excel", "x.xla");
-        assertTypeByName("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "x.xlsx");
-        assertTypeByName("application/vnd.openxmlformats-officedocument.spreadsheetml.template", "x.xltx");
-        assertTypeByName("application/vnd.ms-excel.sheet.macroenabled.12", "x.xlsm");
-        assertTypeByName("application/vnd.ms-excel.template.macroenabled.12", "x.xltm");
-        assertTypeByName("application/vnd.ms-excel.addin.macroenabled.12", "x.xlam");
-        assertTypeByName("application/vnd.ms-excel.sheet.binary.macroenabled.12", "x.xlsb");
-        assertTypeByName("application/vnd.ms-powerpoint", "x.ppt");
-        assertTypeByName("application/vnd.ms-powerpoint", "x.pot");
-        assertTypeByName("application/vnd.ms-powerpoint", "x.pps");
-        assertTypeByName("application/vnd.ms-powerpoint", "x.ppa");
-        assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.presentation", "x.pptx");
-        assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.template", "x.potx");
-        assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.slideshow", "x.ppsx");
-        assertTypeByName("application/vnd.ms-powerpoint.addin.macroenabled.12", "x.ppam");
-        assertTypeByName("application/vnd.ms-powerpoint.presentation.macroenabled.12", "x.pptm");
-        assertTypeByName("application/vnd.ms-powerpoint.template.macroenabled.12", "x.potm");
-        assertTypeByName("application/vnd.ms-powerpoint.slideshow.macroenabled.12", "x.ppsm");
-    }
-
-    /**
-     * Note - detecting container formats by mime magic is very very
-     *  iffy, as we can't be sure where things will end up.
-     * People really ought to use the container aware detection...
-     */
-    @Test
-    public void testOLE2Detection() throws Exception {
-        // These have the properties block near the start, so our mime
-        //  magic will spot them
-        assertTypeByData("application/vnd.ms-excel", "testEXCEL.xls");
-        
-        // This one quite legitimately doesn't have its properties block
-        //  as one of the first couple of entries
-        // As such, our mime magic can't figure it out...
-        assertTypeByData("application/x-tika-msoffice", "testWORD.doc");
-        assertTypeByData("application/x-tika-msoffice", "testPPT.ppt");
-        
-        
-        // By name + data:
-        
-        // Those we got right to start with are fine
-        assertTypeByNameAndData("application/vnd.ms-excel","testEXCEL.xls");
-        
-        // And the name lets us specialise the generic OOXML
-        //  ones to their actual type
-        assertTypeByNameAndData("application/vnd.ms-powerpoint", "testPPT.ppt");
-        assertTypeByNameAndData("application/msword", "testWORD.doc");
-    }
-    
-    /**
-     * Files generated by Works 7.0 Spreadsheet application use the OLE2
-     * structure and resemble Excel files (they contain a "Workbook"). They are
-     * not Excel though. They are distinguished from Excel files with an
-     * additional top-level entry in below the root of the POI filesystem.
-     * 
-     * @throws Exception
-     */
-    @Test
-    public void testWorksSpreadsheetDetection() throws Exception {
-        assertTypeDetection("testWORKSSpreadsheet7.0.xlr",
-                // with name-only, everything should be all right 
-                "application/x-tika-msworks-spreadsheet",
-                // this is possible due to MimeTypes guessing the type
-                // based on the WksSSWorkBook near the beginning of the
-                // file
-                "application/x-tika-msworks-spreadsheet",
-                // this is right, the magic-based detection works, there is
-                // no need for the name-based detection to refine it
-                "application/x-tika-msworks-spreadsheet");
-    }
-    
-    @Test
-    public void testStarOfficeDetection() throws Exception {
-        assertTypeDetection("testVORCalcTemplate.vor",
-                "application/x-staroffice-template",
-                "application/vnd.stardivision.calc",
-                "application/vnd.stardivision.calc");
-        assertTypeDetection("testVORDrawTemplate.vor",
-                "application/x-staroffice-template",
-                "application/vnd.stardivision.draw",
-                "application/vnd.stardivision.draw");
-        assertTypeDetection("testVORImpressTemplate.vor",
-                "application/x-staroffice-template",
-                "application/vnd.stardivision.impress",
-                "application/vnd.stardivision.impress");
-        assertTypeDetection("testVORWriterTemplate.vor",
-                "application/x-staroffice-template",
-                "application/vnd.stardivision.writer",
-                "application/vnd.stardivision.writer");
-        
-        assertTypeDetection("testStarOffice-5.2-calc.sdc",
-                "application/vnd.stardivision.calc",
-                "application/vnd.stardivision.calc",
-                "application/vnd.stardivision.calc");
-        assertTypeDetection("testStarOffice-5.2-draw.sda",
-                "application/vnd.stardivision.draw",
-                "application/vnd.stardivision.draw",
-                "application/vnd.stardivision.draw");
-        assertTypeDetection("testStarOffice-5.2-impress.sdd",
-                "application/vnd.stardivision.impress",
-                "application/vnd.stardivision.impress",
-                "application/vnd.stardivision.impress");
-        assertTypeDetection("testStarOffice-5.2-writer.sdw",
-                "application/vnd.stardivision.writer",
-                "application/vnd.stardivision.writer",
-                "application/vnd.stardivision.writer");
-    }
-    
-    /**
-     * Files generated by Works Word Processor versions 3.0 and 4.0 use the
-     * OLE2 structure. They don't resemble Word though.
-     * 
-     * @throws Exception
-     */
-    @Test
-    public void testOldWorksWordProcessorDetection() throws Exception {
-        assertTypeDetection(
-                "testWORKSWordProcessor3.0.wps",
-                // .wps is just like any other works extension
-                "application/vnd.ms-works",
-                // this is due to MatOST substring
-                "application/vnd.ms-works",
-                // magic-based detection works, no need to refine it
-                "application/vnd.ms-works");
-        
-        // files in version 4.0 are no different from those in version 3.0
-        assertTypeDetection(
-                "testWORKSWordProcessor4.0.wps",
-                "application/vnd.ms-works",
-                "application/vnd.ms-works",
-                "application/vnd.ms-works");
-    }
-    
-    /**
-     * Files from Excel 2 through 4 are based on the BIFF record
-     *  structure, but without a wrapping OLE2 structure.
-     * Excel 5 and Excel 95+ work on OLE2
-     */
-    @Test
-    public void testOldExcel() throws Exception {
-        // With just a name, we'll think everything's a new Excel file
-        assertTypeByName("application/vnd.ms-excel","testEXCEL_4.xls");
-        assertTypeByName("application/vnd.ms-excel","testEXCEL_5.xls");
-        assertTypeByName("application/vnd.ms-excel","testEXCEL_95.xls");
-        
-        // With data, we can work out if it's old or new style
-        assertTypeByData("application/vnd.ms-excel.sheet.4","testEXCEL_4.xls");
-        assertTypeByData("application/x-tika-msoffice","testEXCEL_5.xls");
-        assertTypeByData("application/x-tika-msoffice","testEXCEL_95.xls");
-        
-        assertTypeByNameAndData("application/vnd.ms-excel.sheet.4","testEXCEL_4.xls");
-        assertTypeByNameAndData("application/vnd.ms-excel","testEXCEL_5.xls");
-        assertTypeByNameAndData("application/vnd.ms-excel","testEXCEL_95.xls");
-    }
-    
-    /**
-     * Note - detecting container formats by mime magic is very very
-     *  iffy, as we can't be sure where things will end up.
-     * People really ought to use the container aware detection...
-     */
-    @Test
-    public void testOoxmlDetection() throws Exception {
-        // These two do luckily have [Content_Types].xml near the start,
-        //  so our mime magic will spot them
-        assertTypeByData("application/x-tika-ooxml", "testEXCEL.xlsx");
-        assertTypeByData("application/x-tika-ooxml", "testPPT.pptx");
-        
-        // This one quite legitimately doesn't have its [Content_Types].xml
-        //  file as one of the first couple of entries
-        // As such, our mime magic can't figure it out...
-        assertTypeByData("application/zip", "testWORD.docx");
-        
-        // If we give the filename as well as the data, we can
-        //  specialise the ooxml generic one to the correct type
-        assertTypeByNameAndData("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "testEXCEL.xlsx");
-        assertTypeByNameAndData("application/vnd.openxmlformats-officedocument.presentationml.presentation", "testPPT.pptx");
-        assertTypeByNameAndData("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "testWORD.docx");
-        
-        // Test a few of the less usual ones
-        assertTypeByNameAndData("application/vnd.ms-excel.sheet.binary.macroenabled.12","testEXCEL.xlsb");
-        assertTypeByNameAndData("application/vnd.ms-powerpoint.presentation.macroenabled.12", "testPPT.pptm");
-        assertTypeByNameAndData("application/vnd.ms-powerpoint.template.macroenabled.12", "testPPT.potm");
-        assertTypeByNameAndData("application/vnd.ms-powerpoint.slideshow.macroenabled.12", "testPPT.ppsm");
-    }
-    
-    /**
-     * Note - container based formats, needs container detection
-     *  to be properly correct
-     */
-    @Test
-    public void testVisioDetection() throws Exception {
-        // By Name, should get it right
-        assertTypeByName("application/vnd.visio", "testVISIO.vsd");
-        assertTypeByName("application/vnd.ms-visio.drawing.macroenabled.12", "testVISIO.vsdm");
-        assertTypeByName("application/vnd.ms-visio.drawing", "testVISIO.vsdx");
-        assertTypeByName("application/vnd.ms-visio.stencil.macroenabled.12", "testVISIO.vssm");
-        assertTypeByName("application/vnd.ms-visio.stencil", "testVISIO.vssx");
-        assertTypeByName("application/vnd.ms-visio.template.macroenabled.12", "testVISIO.vstm");
-        assertTypeByName("application/vnd.ms-visio.template", "testVISIO.vstx");
-        
-        // By Name and Data, should get it right
-        assertTypeByNameAndData("application/vnd.visio", "testVISIO.vsd");
-        assertTypeByNameAndData("application/vnd.ms-visio.drawing.macroenabled.12", "testVISIO.vsdm");
-        assertTypeByNameAndData("application/vnd.ms-visio.drawing", "testVISIO.vsdx");
-        assertTypeByNameAndData("application/vnd.ms-visio.stencil.macroenabled.12", "testVISIO.vssm");
-        assertTypeByNameAndData("application/vnd.ms-visio.stencil", "testVISIO.vssx");
-        assertTypeByNameAndData("application/vnd.ms-visio.template.macroenabled.12", "testVISIO.vstm");
-        assertTypeByNameAndData("application/vnd.ms-visio.template", "testVISIO.vstx");
-        
-        // By Data only, will get the container parent
-        assertTypeByData("application/x-tika-msoffice", "testVISIO.vsd");
-        assertTypeByData("application/x-tika-ooxml", "testVISIO.vsdm");
-        assertTypeByData("application/x-tika-ooxml", "testVISIO.vsdx");
-        assertTypeByData("application/x-tika-ooxml", "testVISIO.vssm");
-        assertTypeByData("application/x-tika-ooxml", "testVISIO.vssx");
-        assertTypeByData("application/x-tika-ooxml", "testVISIO.vstm");
-        assertTypeByData("application/x-tika-ooxml", "testVISIO.vstx");
-    }
-
-    /**
-     * Note - detecting container formats by mime magic is very very
-     *  iffy, as we can't be sure where things will end up.
-     * People really ought to use the container aware detection...
-     */
-    @Test
-    public void testIWorkDetection() throws Exception {
-        // By name is easy
-       assertTypeByName("application/vnd.apple.keynote", "testKeynote.key");
-       assertTypeByName("application/vnd.apple.numbers", "testNumbers.numbers");
-       assertTypeByName("application/vnd.apple.pages", "testPages.pages");
-       
-       // We can't do it by data, as we'd need to unpack
-       //  the zip file to check the XML 
-       assertTypeByData("application/zip", "testKeynote.key");
-       
-       assertTypeByNameAndData("application/vnd.apple.keynote", "testKeynote.key");
-       assertTypeByNameAndData("application/vnd.apple.numbers", "testNumbers.numbers");
-       assertTypeByNameAndData("application/vnd.apple.pages", "testPages.pages");
-    }
-    
-    @Test
-    public void testArchiveDetection() throws Exception {
-       assertTypeByName("application/x-archive", "test.ar");
-       assertTypeByName("application/zip",    "test.zip");
-       assertTypeByName("application/x-tar",  "test.tar");
-       assertTypeByName("application/gzip", "test.tgz"); // See GZIP, not tar contents of it
-       assertTypeByName("application/x-cpio", "test.cpio");
-       
-       // TODO Add an example .deb and .udeb, then check these
-       
-       // Check the mime magic patterns for them work too
-       assertTypeByData("application/x-archive", "testARofText.ar");
-       assertTypeByData("application/x-archive", "testARofSND.ar"); 
-       assertTypeByData("application/zip",    "test-documents.zip");
-       assertTypeByData("application/x-gtar",  "test-documents.tar"); // GNU TAR
-       assertTypeByData("application/gzip", "test-documents.tgz"); // See GZIP, not tar contents of it
-       assertTypeByData("application/x-cpio", "test-documents.cpio");
-       
-       // For spanned zip files, the .zip file doesn't have the header, it's the other parts
-       assertTypeByData("application/octet-stream", "test-documents-spanned.zip");
-       assertTypeByData("application/zip",          "test-documents-spanned.z01");
-    }
-    
-    @Test
-    public void testFeedsDetection() throws Exception {
-        assertType("application/rss+xml",  "rsstest.rss");
-        assertType("application/atom+xml", "testATOM.atom");
-        assertTypeByData("application/rss+xml",  "rsstest.rss");
-        assertTypeByName("application/rss+xml",  "rsstest.rss");
-        assertTypeByData("application/atom+xml", "testATOM.atom");
-        assertTypeByName("application/atom+xml", "testATOM.atom");
-    }
-    
-    @Test
-    public void testFitsDetection() throws Exception {
-        // FITS image created using imagemagick convert of testJPEG.jpg
-        assertType("application/fits", "testFITS.fits");
-        assertTypeByData("application/fits", "testFITS.fits");
-        assertTypeByName("application/fits", "testFITS.fits");
-    }
-
-    @Test
-    public void testJpegDetection() throws Exception {
-        assertType("image/jpeg", "testJPEG.jpg");
-        assertTypeByData("image/jpeg", "testJPEG.jpg");
-        assertTypeByName("image/jpeg", "x.jpg");
-        assertTypeByName("image/jpeg", "x.JPG");
-        assertTypeByName("image/jpeg", "x.jpeg");
-        assertTypeByName("image/jpeg", "x.JPEG");
-        assertTypeByName("image/jpeg", "x.jpe");
-        assertTypeByName("image/jpeg", "x.jif");
-        assertTypeByName("image/jpeg", "x.jfif");
-        assertTypeByName("image/jpeg", "x.jfi");
-        
-        assertType("image/jp2", "testJPEG.jp2");
-        assertTypeByData("image/jp2", "testJPEG.jp2");
-        assertTypeByName("image/jp2", "x.jp2");
-    }
-
-    @Test
-    public void testBpgDetection() throws Exception {
-        assertType("image/x-bpg", "testBPG.bpg");
-        assertTypeByData("image/x-bpg", "testBPG.bpg");
-        assertTypeByData("image/x-bpg", "testBPG_commented.bpg");
-        assertTypeByName("image/x-bpg", "x.bpg");
-    }
-    
-    @Test
-    public void testTiffDetection() throws Exception {
-        assertType("image/tiff", "testTIFF.tif");
-        assertTypeByData("image/tiff", "testTIFF.tif");
-        assertTypeByName("image/tiff", "x.tiff");
-        assertTypeByName("image/tiff", "x.tif");
-        assertTypeByName("image/tiff", "x.TIF");
-    }
-
-    @Test
-    public void testGifDetection() throws Exception {
-        assertType("image/gif", "testGIF.gif");
-        assertTypeByData("image/gif", "testGIF.gif");
-        assertTypeByName("image/gif", "x.gif");
-        assertTypeByName("image/gif", "x.GIF");
-    }
-
-    @Test
-    public void testPngDetection() throws Exception {
-        assertType("image/png", "testPNG.png");
-        assertTypeByData("image/png", "testPNG.png");
-        assertTypeByName("image/png", "x.png");
-        assertTypeByName("image/png", "x.PNG");
-    }
-
-    @Test
-    public void testWEBPDetection() throws Exception {
-        assertType("image/webp", "testWEBP.webp");
-        assertTypeByData("image/webp", "testWEBP.webp");
-        assertTypeByName("image/webp", "x.webp");
-        assertTypeByName("image/webp", "x.WEBP");
-    }
-
-    @Test
-    public void testBmpDetection() throws Exception {
-        assertType("image/x-ms-bmp", "testBMP.bmp");
-        assertTypeByData("image/x-ms-bmp", "testBMP.bmp");
-        assertTypeByName("image/x-ms-bmp", "x.bmp");
-        assertTypeByName("image/x-ms-bmp", "x.BMP");
-        assertTypeByName("image/x-ms-bmp", "x.dib");
-        assertTypeByName("image/x-ms-bmp", "x.DIB");
-        //false positive check -- contains part of BMP signature
-        assertType("text/plain", "testBMPfp.txt");
-    }
-
-    @Test
-    public void testPnmDetection() throws Exception {
-        assertType("image/x-portable-bitmap", "testPBM.pbm");
-        assertType("image/x-portable-graymap", "testPGM.pgm");
-        assertType("image/x-portable-pixmap", "testPPM.ppm");
-        assertTypeByData("image/x-portable-bitmap", "testPBM.pbm");
-        assertTypeByData("image/x-portable-graymap", "testPGM.pgm");
-        assertTypeByData("image/x-portable-pixmap", "testPPM.ppm");
-        assertTypeByName("image/x-portable-anymap", "x.pnm");
-        assertTypeByName("image/x-portable-anymap", "x.PNM");
-        assertTypeByName("image/x-portable-bitmap", "x.pbm");
-        assertTypeByName("image/x-portable-bitmap", "x.PBM");
-        assertTypeByName("image/x-portable-graymap", "x.pgm");
-        assertTypeByName("image/x-portable-graymap", "x.PGM");
-        assertTypeByName("image/x-portable-pixmap", "x.ppm");
-        assertTypeByName("image/x-portable-pixmap", "x.PPM");
-    }
-
-    @Test
-    public void testPictDetection() throws Exception {
-        assertType("image/x-pict", "testPICT.pct");
-        assertTypeByData("image/x-pict", "testPICT.pct");
-        assertTypeByName("image/x-pict", "x.pic");
-        assertTypeByName("image/x-pict", "x.PCT");
-    }
-
-    @Test
-    public void testCgmDetection() throws Exception {
-        // TODO: Need a test image file
-        assertTypeByName("image/cgm", "x.cgm");
-        assertTypeByName("image/cgm", "x.CGM");
-    }
-
-    @Test
-    public void testRdfXmlDetection() throws Exception {
-        assertTypeByName("application/rdf+xml", "x.rdf");
-        assertTypeByName("application/rdf+xml", "x.owl");
-    }
-
-    @Test
-    public void testSvgDetection() throws Exception {
-        assertType("image/svg+xml", "testSVG.svg");
-        assertTypeByData("image/svg+xml", "testSVG.svg");
-        assertTypeByName("image/svg+xml", "x.svg");
-        assertTypeByName("image/svg+xml", "x.SVG");
-
-        // Should *.svgz be svg or gzip
-        assertType("application/gzip", "testSVG.svgz");
-        assertTypeByData("application/gzip", "testSVG.svgz");
-        assertTypeByName("image/svg+xml", "x.svgz");
-        assertTypeByName("image/svg+xml", "x.SVGZ");
-    }
-
-    @Test
-    public void testPdfDetection() throws Exception {
-        // PDF extension by name is enough
-        assertTypeByName("application/pdf", "x.pdf");
-        assertTypeByName("application/pdf", "x.PDF");
-
-        // For normal PDFs, can get by name or data or both
-        assertType("application/pdf", "testPDF.pdf");
-        assertTypeByData("application/pdf", "testPDF.pdf");
-
-        // PDF with a BoM works both ways too
-        assertType("application/pdf", "testPDF_bom.pdf");
-        assertTypeByData("application/pdf", "testPDF_bom.pdf");
-    }
-
-    @Test
-    public void testSwfDetection() throws Exception {
-        assertTypeByName("application/x-shockwave-flash", "x.swf");
-        assertTypeByName("application/x-shockwave-flash", "x.SWF");
-        assertTypeByName("application/x-shockwave-flash", "test1.swf");
-        assertTypeByName("application/x-shockwave-flash", "test2.swf");
-        assertTypeByName("application/x-shockwave-flash", "test3.swf");
-    }
-
-    @Test
-    public void testDwgDetection() throws Exception {
-        assertTypeByName("image/vnd.dwg", "x.dwg");
-        assertTypeByData("image/vnd.dwg", "testDWG2004.dwg");
-        assertTypeByData("image/vnd.dwg", "testDWG2007.dwg");
-        assertTypeByData("image/vnd.dwg", "testDWG2010.dwg");
-    }
-
-    @Test
-    public void testprtDetection() throws Exception {
-       assertTypeByName("application/x-prt", "x.prt");
-       assertTypeByData("application/x-prt", "testCADKEY.prt");
-   }
-    
-    /**
-     * Formats which are based on plain text
-     */
-    @Test
-    public void testTextBasedFormatsDetection() throws Exception {
-       assertTypeByName("text/plain", "testTXT.txt");
-       assertType(      "text/plain", "testTXT.txt");
-       
-       assertTypeByName("text/css", "testCSS.css");
-       assertType(      "text/css", "testCSS.css");
-       
-       assertTypeByName("text/csv", "testCSV.csv");
-       assertType(      "text/csv", "testCSV.csv");
-       
-       assertTypeByName("text/html", "testHTML.html");
-       assertType(      "text/html", "testHTML.html");
-       
-       assertTypeByName("application/javascript", "testJS.js");
-       assertType(      "application/javascript", "testJS.js");
-    }
-    
-    @Test
-    public void testJavaDetection() throws Exception {
-        // TODO Classloader doesn't seem to find the .class file in test-documents
-        //assertTypeDetection("AutoDetectParser.class", "application/java-vm");
-        
-        // OSX Native Extension
-        assertTypeDetection("testJNILIB.jnilib", "application/x-java-jnilib");
-    }
-
-    @Test
-    public void testXmlAndHtmlDetection() throws Exception {
-        assertTypeByData("application/xml", "<?xml version=\"1.0\" encoding=\"UTF-8\"?><records><record/></records>"
-                .getBytes(UTF_8));
-        assertTypeByData("application/xml", "\uFEFF<?xml version=\"1.0\" encoding=\"UTF-16\"?><records><record/></records>"
-                .getBytes(UTF_16LE));
-        assertTypeByData("application/xml", "\uFEFF<?xml version=\"1.0\" encoding=\"UTF-16\"?><records><record/></records>"
-                .getBytes(UTF_16BE));
-        assertTypeByData("application/xml", "<!-- XML without processing instructions --><records><record/></records>"
-                .getBytes(UTF_8));
-        assertTypeByData("text/html", "<html><body>HTML</body></html>"
-                .getBytes(UTF_8));
-        assertTypeByData("text/html", "<!-- HTML comment --><html><body>HTML</body></html>"
-                .getBytes(UTF_8));
-    }
-
-    @Test
-    public void testWmfDetection() throws Exception {
-        assertTypeByName("application/x-msmetafile", "x.wmf");
-        assertTypeByData("application/x-msmetafile", "testWMF.wmf");
-        assertTypeByName("application/x-msmetafile", "x.WMF");
-
-        assertTypeByName("application/x-emf", "x.emf");
-        assertTypeByData("application/x-emf","testEMF.emf");
-        assertTypeByName("application/x-emf", "x.EMF");
-        // TODO: Need a test wmz file
-        assertTypeByName("application/x-ms-wmz", "x.wmz");
-        assertTypeByName("application/x-ms-wmz", "x.WMZ");
-        // TODO: Need a test emz file
-        assertTypeByName("application/gzip", "x.emz");
-        assertTypeByName("application/gzip", "x.EMZ");
-    }
-
-    @Test
-    public void testPsDetection() throws Exception {
-        // TODO: Need a test postscript file
-        assertTypeByName("application/postscript", "x.ps");
-        assertTypeByName("application/postscript", "x.PS");
-        assertTypeByName("application/postscript", "x.eps");
-        assertTypeByName("application/postscript", "x.epsf");
-        assertTypeByName("application/postscript", "x.epsi");
-    }
-    
-    @Test
-    public void testMicrosoftMultiMediaDetection() throws Exception {
-       assertTypeByName("video/x-ms-asf", "x.asf");
-       assertTypeByName("video/x-ms-wmv", "x.wmv");
-       assertTypeByName("audio/x-ms-wma", "x.wma");
-       
-       assertTypeByData("video/x-ms-asf", "testASF.asf");
-       assertTypeByData("video/x-ms-wmv", "testWMV.wmv");
-       assertTypeByData("audio/x-ms-wma", "testWMA.wma");
-    }
-    
-    /**
-     * All 3 DITA types are in theory handled by the same mimetype,
-     *  but we specialise them 
-     */
-    @Test
-    public void testDITADetection() throws Exception {
-       assertTypeByName("application/dita+xml; format=topic", "test.dita");
-       assertTypeByName("application/dita+xml; format=map", "test.ditamap");
-       assertTypeByName("application/dita+xml; format=val", "test.ditaval");
-       
-       assertTypeByData("application/dita+xml; format=task", "testDITA.dita");
-       assertTypeByData("application/dita+xml; format=concept", "testDITA2.dita");
-       assertTypeByData("application/dita+xml; format=map", "testDITA.ditamap");
-       
-       assertTypeByNameAndData("application/dita+xml; format=task", "testDITA.dita");
-       assertTypeByNameAndData("application/dita+xml; format=concept", "testDITA2.dita");
-       assertTypeByNameAndData("application/dita+xml; format=map", "testDITA.ditamap");
-       
-       // These are all children of the official type
-       assertEquals("application/dita+xml", 
-             repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData("testDITA.ditamap")).toString());
-       assertEquals("application/dita+xml", 
-             repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData("testDITA.dita")).toString());
-       // Concept inherits from topic
-       assertEquals("application/dita+xml; format=topic", 
-             repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData("testDITA2.dita")).toString());
-    }
-
-    /**
-     * @since TIKA-194
-     */
-    @Test
-    public void testJavaRegex() throws Exception{
-        MimeType testType = new MimeType(MediaType.parse("foo/bar"));
-        this.repo.add(testType);
-        assertNotNull(repo.forName("foo/bar"));
-        String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
-        this.repo.addPattern(testType, pattern, true);
-        String testFileName = "rtg_sst_grb_0.5.12345678";
-        assertEquals("foo/bar", tika.detect(testFileName));
-
-        MimeType testType2 = new MimeType(MediaType.parse("foo/bar2"));
-        this.repo.add(testType2);
-        assertNotNull(repo.forName("foo/bar2"));
-        this.repo.addPattern(testType2, pattern, false);
-        assertNotSame("foo/bar2", tika.detect(testFileName));
-    }
-    
-    @Test
-    public void testRawDetection() throws Exception {
-        assertTypeByName("image/x-raw-adobe", "x.dng");
-        assertTypeByName("image/x-raw-adobe", "x.DNG");
-        assertTypeByName("image/x-raw-hasselblad", "x.3fr");
-        assertTypeByName("image/x-raw-fuji", "x.raf");
-        assertTypeByName("image/x-raw-canon", "x.crw");
-        assertTypeByName("image/x-raw-canon", "x.cr2");
-        assertTypeByName("image/x-raw-kodak", "x.k25");
-        assertTypeByName("image/x-raw-kodak", "x.kdc");
-        assertTypeByName("image/x-raw-kodak", "x.dcs");
-        assertTypeByName("image/x-raw-kodak", "x.drf");
-        assertTypeByName("image/x-raw-minolta", "x.mrw");
-        assertTypeByName("image/x-raw-nikon", "x.nef");
-        assertTypeByName("image/x-raw-nikon", "x.nrw");
-        assertTypeByName("image/x-raw-olympus", "x.orf");
-        assertTypeByName("image/x-raw-pentax", "x.ptx");
-        assertTypeByName("image/x-raw-pentax", "x.pef");
-        assertTypeByName("image/x-raw-sony", "x.arw");
-        assertTypeByName("image/x-raw-sony", "x.srf");
-        assertTypeByName("image/x-raw-sony", "x.sr2");
-        assertTypeByName("image/x-raw-sigma", "x.x3f");
-        assertTypeByName("image/x-raw-epson", "x.erf");
-        assertTypeByName("image/x-raw-mamiya", "x.mef");
-        assertTypeByName("image/x-raw-leaf", "x.mos");
-        assertTypeByName("image/x-raw-panasonic", "x.raw");
-        assertTypeByName("image/x-raw-panasonic", "x.rw2");
-        assertTypeByName("image/x-raw-phaseone", "x.iiq");
-        assertTypeByName("image/x-raw-red", "x.r3d");
-        assertTypeByName("image/x-raw-imacon", "x.fff");
-        assertTypeByName("image/x-raw-logitech", "x.pxn");
-        assertTypeByName("image/x-raw-casio", "x.bay");
-        assertTypeByName("image/x-raw-rawzor", "x.rwz");
-    }
-    
-    /**
-     * Tests that we correctly detect the font types
-     */
-    @Test
-    public void testFontDetection() throws Exception {
-       assertTypeByName("application/x-font-adobe-metric", "x.afm");
-       assertTypeByData("application/x-font-adobe-metric", "testAFM.afm");
-       
-       assertTypeByName("application/x-font-printer-metric", "x.pfm");
-       // TODO Get a sample .pfm file
-       assertTypeByData(
-             "application/x-font-printer-metric", 
-             new byte[] {0x00, 0x01, 256-0xb1, 0x0a, 0x00, 0x00, 0x43, 0x6f,  
-                         0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20}
-       );
-       
-       assertTypeByName("application/x-font-type1", "x.pfa");
-       // TODO Get a sample .pfa file
-       assertTypeByData(
-             "application/x-font-type1", 
-             new byte[] {0x25, 0x21, 0x50, 0x53, 0x2d, 0x41, 0x64, 0x6f,
-                         0x62, 0x65, 0x46, 0x6f, 0x6e, 0x74, 0x2d, 0x31,
-                         0x2e, 0x30, 0x20, 0x20, 0x2d, 0x2a, 0x2d, 0x20}
-       );
-       
-       assertTypeByName("application/x-font-type1", "x.pfb");
-       // TODO Get a sample .pfm file
-       assertTypeByData(
-             "application/x-font-type1", 
-             new byte[] {-0x80, 0x01, 0x09, 0x05, 0x00, 0x00, 0x25, 0x21,
-                          0x50, 0x53, 0x2d, 0x41, 0x64, 0x6f, 0x62, 0x65,
-                          0x46, 0x6f, 0x6e, 0x74, 0x2d, 0x31, 0x2e, 0x30 }
-       );
-    }
-
-    /**
-     * Tests MimeTypes.getMimeType(URL), which examines both the byte header
-     * and, if necessary, the URL's extension.
-     */
-    @Test
-    public void testMimeDeterminationForTestDocuments() throws Exception {
-        assertType("text/html", "testHTML.html");
-        assertType("application/zip", "test-documents.zip");
-
-        assertType("text/html", "testHTML_utf8.html");
-        assertType(
-                "application/vnd.oasis.opendocument.text",
-                "testOpenOffice2.odt");
-        assertType("application/pdf", "testPDF.pdf");
-        assertType("application/rtf", "testRTF.rtf");
-        assertType("text/plain", "testTXT.txt");
-        assertType("application/xml", "testXML.xml");
-        assertType("audio/basic", "testAU.au");
-        assertType("audio/x-aiff", "testAIFF.aif");
-        assertType("audio/x-wav", "testWAV.wav");
-        assertType("audio/midi", "testMID.mid");
-        assertType("application/x-msaccess", "testACCESS.mdb");
-        assertType("application/x-font-ttf", "testTrueType3.ttf");
-    }
-    
-    @Test
-    public void test7ZipDetection() throws Exception {
-       assertTypeByName("application/x-7z-compressed","test-documents.7z");
-       assertTypeByData("application/x-7z-compressed","test-documents.7z");
-       assertTypeByNameAndData("application/x-7z-compressed", "test-documents.7z");
-   }
-
-    @Test
-    public void testWebArchiveDetection() throws Exception {
-        assertTypeByName("application/x-webarchive","x.webarchive");
-        assertTypeByData("application/x-bplist","testWEBARCHIVE.webarchive");
-        assertTypeByNameAndData("application/x-webarchive", "testWEBARCHIVE.webarchive");
-    }
-
-    /**
-     * KML, and KMZ (zipped KML)
-     */
-    @Test
-    public void testKMLZDetection() throws Exception {
-       assertTypeByName("application/vnd.google-earth.kml+xml","testKML.kml");
-       assertTypeByData("application/vnd.google-earth.kml+xml","testKML.kml");
-       assertTypeByNameAndData("application/vnd.google-earth.kml+xml", "testKML.kml");
-       
-       assertTypeByName("application/vnd.google-earth.kmz","testKMZ.kmz");
-       assertTypeByNameAndData("application/vnd.google-earth.kmz", "testKMZ.kmz");
-       
-       // By data only, mimetype magic only gets us to a .zip
-       // We need to use the Zip Aware detector to get the full type
-       assertTypeByData("application/zip","testKMZ.kmz");
-   }
-
-    @Test
-    public void testCreativeSuite() throws IOException {
-        assertTypeDetection("testINDD.indd", "application/x-adobe-indesign");
-        assertTypeDetection("testPSD.psd", "image/vnd.adobe.photoshop");
-    }
-    
-    @Test
-    public void testAMR() throws IOException {
-        // AMR matches on name, data or both
-        assertTypeDetection("testAMR.amr", "audio/amr");
-        
-        // AMR-WB subtype shares extension, so needs data to identify
-        assertTypeDetection("testAMR-WB.amr", "audio/amr", "audio/amr-wb", "audio/amr-wb");
-        
-        // Ditto for the AMR-WB+ subtype, which we don't have a sample file of yet
-        //assertTypeDetection("testAMR-WB+.amr", "audio/amr", "audio/amr-wb+", "audio/amr-wb+");
-    }
-    
-    @Test
-    public void testEmail() throws IOException {
-        // EMLX
-        assertTypeDetection("testEMLX.emlx", "message/x-emlx");
-        
-        // Groupwise
-        assertTypeDetection("testGroupWiseEml.eml", "message/rfc822");
-        
-        // Lotus
-        assertTypeDetection("testLotusEml.eml", "message/rfc822");
-        
-        // Thunderbird - doesn't currently work by name
-        assertTypeByNameAndData("message/rfc822", "testThunderbirdEml.eml");
-    }
-    
-    @Test
-    public void testAxCrypt() throws Exception {
-        // test-TXT.txt encrypted with a key of "tika"
-        assertTypeDetection("testTXT-tika.axx", "application/x-axcrypt");
-    }
-    
-    @Test
-    public void testWindowsEXE() throws Exception {
-        assertTypeByName("application/x-msdownload", "x.dll");
-        assertTypeByName("application/x-ms-installer", "x.msi");
-        assertTypeByName("application/x-dosexec", "x.exe");
-        
-        assertTypeByData("application/x-msdownload; format=pe", "testTinyPE.exe");
-        assertTypeByNameAndData("application/x-msdownload; format=pe", "testTinyPE.exe");
-        
-        // A jar file with part of a PE header, but not a full one
-        //  should still be detected as a zip or jar (without/with name)
-        assertTypeByData("application/zip", "testJAR_with_PEHDR.jar");
-        assertTypeByNameAndData("application/java-archive", "testJAR_with_PEHDR.jar");
-    }
-    
-    @Test
-    public void testMatroskaDetection() throws Exception {
-        assertType("video/x-matroska", "testMKV.mkv");
-        // TODO: Need custom detector data detection, see TIKA-1180
-        assertTypeByData("application/x-matroska", "testMKV.mkv");
-        assertTypeByNameAndData("video/x-matroska", "testMKV.mkv");
-        assertTypeByName("video/x-matroska", "x.mkv");
-        assertTypeByName("video/x-matroska", "x.MKV");
-        assertTypeByName("audio/x-matroska", "x.mka");
-        assertTypeByName("audio/x-matroska", "x.MKA");
-    }
-    
-    @Test
-    public void testWebMDetection() throws Exception {
-        assertType("video/webm", "testWEBM.webm");
-        // TODO: Need custom detector data detection, see TIKA-1180
-        assertTypeByData("application/x-matroska", "testWEBM.webm");
-        assertTypeByNameAndData("video/webm", "testWEBM.webm");
-        assertTypeByName("video/webm", "x.webm");
-        assertTypeByName("video/webm", "x.WEBM");
-    }
-
-    /** Test getMimeType(byte[]) */
-    @Test
-    public void testGetMimeType_byteArray() throws IOException {
-        // Plain text detection
-        assertText(new byte[] { (byte) 0xFF, (byte) 0xFE });
-        assertText(new byte[] { (byte) 0xFF, (byte) 0xFE });
-        assertText(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
-        assertText(new byte[] { 'a', 'b', 'c' });
-        assertText(new byte[] { '\t', '\r', '\n', 0x0C, 0x1B });
-        assertNotText(new byte[] { '\t', '\r', '\n', 0x0E, 0x1C });
-    }
-    
-    @Test
-    public void testBerkeleyDB() throws IOException {
-        assertTypeByData(
-                "application/x-berkeley-db; format=btree; version=2", 
-                "testBDB_btree_2.db");
-        assertTypeByData(
-                "application/x-berkeley-db; format=btree; version=3", 
-                "testBDB_btree_3.db");
-        assertTypeByData(
-                "application/x-berkeley-db; format=btree; version=4", 
-                "testBDB_btree_4.db");
-        // V4 and V5 share the same btree format
-        assertTypeByData(
-                "application/x-berkeley-db; format=btree; version=4", 
-                "testBDB_btree_5.db");
-        
-        assertTypeByData(
-                "application/x-berkeley-db; format=hash; version=2", 
-                "testBDB_hash_2.db");
-        assertTypeByData(
-                "application/x-berkeley-db; format=hash; version=3", 
-                "testBDB_hash_3.db");
-        assertTypeByData(
-                "application/x-berkeley-db; format=hash; version=4", 
-                "testBDB_hash_4.db");
-        assertTypeByData(
-                "application/x-berkeley-db; format=hash; version=5", 
-                "testBDB_hash_5.db");
-    }
-    
-    /**
-     * CBOR typically contains HTML
-     */
-    @Test
-    public void testCBOR() throws IOException {
-        assertTypeByNameAndData("application/cbor", "NUTCH-1997.cbor");
-        assertTypeByData("application/cbor", "NUTCH-1997.cbor");
-    }
-    
-    @Test
-    public void testZLIB() throws IOException {
-        // ZLIB encoded versions of testTXT.txt
-        assertTypeByData("application/zlib", "testTXT.zlib");
-        assertTypeByData("application/zlib", "testTXT.zlib0");
-        assertTypeByData("application/zlib", "testTXT.zlib5");
-        assertTypeByData("application/zlib", "testTXT.zlib9");
-    }
-    
-    @Test
-    public void testTextFormats() throws Exception {
-        assertType("application/x-bibtex-text-file", "testBIBTEX.bib");
-        assertTypeByData("application/x-bibtex-text-file", "testBIBTEX.bib");
-    }
-    
-    @Test
-    public void testCodeFormats() throws Exception {
-        assertType("text/x-csrc", "testC.c");
-        assertType("text/x-chdr", "testH.h");
-        assertTypeByData("text/x-csrc", "testC.c");
-        assertTypeByData("text/x-chdr", "testH.h");
-        
-        assertTypeByName("text/x-java-source", "testJAVA.java");
-        assertType("text/x-java-properties", "testJAVAPROPS.properties");
-        
-        assertType("text/x-matlab", "testMATLAB.m");
-        assertType("text/x-matlab", "testMATLAB_wtsgaus.m");
-        assertType("text/x-matlab", "testMATLAB_barcast.m");
-        assertTypeByData("text/x-matlab", "testMATLAB.m");
-        assertTypeByData("text/x-matlab", "testMATLAB_wtsgaus.m");
-        assertTypeByData("text/x-matlab", "testMATLAB_barcast.m");
-    }
-
-    @Test
-    public void testWebVTT() throws Exception {
-        assertType("text/vtt", "testWebVTT.vtt");
-        assertTypeByData("text/vtt", "testWebVTT.vtt");
-    }
-    
-    private void assertText(byte[] prefix) throws IOException {
-        assertMagic("text/plain", prefix);
-    }
-
-    private void assertNotText(byte[] prefix) throws IOException {
-        assertMagic("application/octet-stream", prefix);
-    }
-
-    private void assertMagic(String expected, byte[] prefix) throws IOException {
-        MediaType type =
-                repo.detect(new ByteArrayInputStream(prefix), new Metadata());
-        assertNotNull(type);
-        assertEquals(expected, type.toString());
-    }
-
-    private void assertType(String expected, String filename) throws Exception {
-        try (InputStream stream = TestMimeTypes.class.getResourceAsStream(
-                "/test-documents/" + filename)) {
-            assertNotNull("Test file not found: " + filename, stream);
-            Metadata metadata = new Metadata();
-            metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
-            assertEquals(expected, repo.detect(stream, metadata).toString());
-        }
-    }
-
-    private void assertTypeByName(String expected, String filename)
-            throws IOException {
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
-        assertEquals(expected, repo.detect(null, metadata).toString());
-    }
-
-    private void assertTypeByData(String expected, String filename)
-            throws IOException {
-        try (InputStream stream = TestMimeTypes.class.getResourceAsStream(
-                "/test-documents/" + filename)) {
-            assertNotNull("Test file not found: " + filename, stream);
-            Metadata metadata = new Metadata();
-            assertEquals(expected, repo.detect(stream, metadata).toString());
-        }
-    }
-    
-    private void assertTypeByData(String expected, byte[] data)
-            throws IOException {
-        try (InputStream stream = new ByteArrayInputStream(data)) {
-            Metadata metadata = new Metadata();
-            assertEquals(expected, repo.detect(stream, metadata).toString());
-        }
-    }
-
-    private void assertTypeDetection(String filename, String type)
-            throws IOException {
-        assertTypeDetection(filename, type, type, type);
-    }
-
-    private void assertTypeDetection(String filename, String byName, String byData, 
-            String byNameAndData) throws IOException {
-        assertTypeByName(byName, filename);
-        assertTypeByData(byData, filename);
-        assertTypeByNameAndData(byNameAndData, filename);
-    }
-
-    private void assertTypeByNameAndData(String expected, String filename)
-        throws IOException {
-       assertEquals(expected, getTypeByNameAndData(filename).toString());
-    }
-
-    private MediaType getTypeByNameAndData(String filename) throws IOException {
-        try (InputStream stream = TestMimeTypes.class.getResourceAsStream(
-                "/test-documents/" + filename)) {
-            assertNotNull("Test document not found: " + filename, stream);
-            Metadata metadata = new Metadata();
-            metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
-            return repo.detect(stream, metadata);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
deleted file mode 100644
index 91b054e..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipOutputStream;
-
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.XMPDM;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.sax.BodyContentHandler;
-import org.gagravarr.tika.FlacParser;
-import org.gagravarr.tika.OpusParser;
-import org.gagravarr.tika.VorbisParser;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class AutoDetectParserTest {
-    private TikaConfig tika = TikaConfig.getDefaultConfig();
-
-    // Easy to read constants for the MIME types:
-    private static final String RAW        = "application/octet-stream";
-    private static final String EXCEL      = "application/vnd.ms-excel";
-    private static final String HTML       = "text/html; charset=ISO-8859-1";
-    private static final String PDF        = "application/pdf";
-    private static final String POWERPOINT = "application/vnd.ms-powerpoint";
-    private static final String KEYNOTE    = "application/vnd.apple.keynote";
-    private static final String PAGES      = "application/vnd.apple.pages";
-    private static final String NUMBERS    = "application/vnd.apple.numbers";
-    private static final String CHM        = "application/vnd.ms-htmlhelp";
-    private static final String RTF        = "application/rtf";
-    private static final String PLAINTEXT  = "text/plain; charset=ISO-8859-1";
-    private static final String UTF8TEXT   = "text/plain; charset=UTF-8";
-    private static final String WORD       = "application/msword";
-    private static final String XML        = "application/xml";
-    private static final String RSS        = "application/rss+xml";
-    private static final String BMP        = "image/x-ms-bmp";
-    private static final String GIF        = "image/gif";
-    private static final String JPEG       = "image/jpeg";
-    private static final String PNG        = "image/png";
-    private static final String OGG_VORBIS = "audio/vorbis";
-    private static final String OGG_OPUS   = "audio/opus";
-    private static final String OGG_FLAC   = "audio/x-oggflac"; 
-    private static final String FLAC_NATIVE= "audio/x-flac";
-    private static final String OPENOFFICE
-            = "application/vnd.oasis.opendocument.text";
-
-
-    /**
-     * This is where a single test is done.
-     * @param tp the parameters encapsulated in a TestParams instance
-     * @throws IOException
-     */
-    private void assertAutoDetect(TestParams tp) throws Exception {
-        try (InputStream input = AutoDetectParserTest.class.getResourceAsStream(tp.resourceRealName)) {
-            if (input == null) {
-                fail("Could not open stream from specified resource: "
-                        + tp.resourceRealName);
-            }
-            Metadata metadata = new Metadata();
-            metadata.set(Metadata.RESOURCE_NAME_KEY, tp.resourceStatedName);
-            metadata.set(Metadata.CONTENT_TYPE, tp.statedType);
-            ContentHandler handler = new BodyContentHandler();
-            new AutoDetectParser(tika).parse(input, handler, metadata);
-
-            assertEquals("Bad content type: " + tp,
-                    tp.realType, metadata.get(Metadata.CONTENT_TYPE));
-
-            if (tp.expectedContentFragment != null) {
-                assertTrue("Expected content not found: " + tp,
-                        handler.toString().contains(tp.expectedContentFragment));
-            }
-        }
-    }
-
-    /**
-     * Convenience method -- its sole purpose of existence is to make the
-     * call to it more readable than it would be if a TestParams instance
-     * would need to be instantiated there.
-     *
-     * @param resourceRealName real name of resource
-     * @param resourceStatedName stated name -- will a bad name fool us?
-     * @param realType - the real MIME type
-     * @param statedType - stated MIME type - will a wrong one fool us?
-     * @param expectedContentFragment - something expected in the text
-     * @throws Exception
-     */
-    private void assertAutoDetect(String resourceRealName,
-                                  String resourceStatedName,
-                                  String realType,
-                                  String statedType,
-                                  String expectedContentFragment)
-            throws Exception {
-
-        assertAutoDetect(new TestParams(resourceRealName, resourceStatedName,
-                realType, statedType, expectedContentFragment));
-    }
-
-    private void assertAutoDetect(
-            String resource, String type, String content) throws Exception {
-
-        resource = "/test-documents/" + resource;
-
-        // TODO !!!!  The disabled tests below should work!
-        // The correct MIME type should be determined regardless of the
-        // stated type (ContentType hint) and the stated URL name.
-
-
-        // Try different combinations of correct and incorrect arguments:
-        final String wrongMimeType = RAW;
-        assertAutoDetect(resource, resource, type, type,          content);
-        assertAutoDetect(resource, resource, type, null,          content);
-        assertAutoDetect(resource, resource, type, wrongMimeType, content);
-
-        assertAutoDetect(resource, null, type, type,          content);
-        assertAutoDetect(resource, null, type, null,          content);
-        assertAutoDetect(resource, null, type, wrongMimeType, content);
-
-        final String badResource = "a.xyz";
-        assertAutoDetect(resource, badResource, type, type,          content);
-        assertAutoDetect(resource, badResource, type, null,          content);
-        assertAutoDetect(resource, badResource, type, wrongMimeType, content);
-    }
-
-    @Test
-    public void testKeynote() throws Exception {
-        assertAutoDetect("testKeynote.key", KEYNOTE, "A sample presentation");
-    }
-
-    @Test
-    public void testPages() throws Exception {
-        assertAutoDetect("testPages.pages", PAGES, "Sample pages document");
-    }
-
-    @Test
-    public void testNumbers() throws Exception {
-        assertAutoDetect("testNumbers.numbers", NUMBERS, "Checking Account: 300545668");
-    }
-
-    @Test
-    public void testChm() throws Exception {
-        assertAutoDetect("testChm.chm", CHM, "If you do not specify a window type or a window name, the main window is used.");
-    }
-
-    @Test
-    public void testEpub() throws Exception {
-        assertAutoDetect(
-                "testEPUB.epub", "application/epub+zip",
-                "The previous headings were subchapters");
-    }
-
-    @Test
-    public void testExcel() throws Exception {
-        assertAutoDetect("testEXCEL.xls", EXCEL, "Sample Excel Worksheet");
-    }
-
-    @Test
-    public void testHTML() throws Exception {
-        assertAutoDetect("testHTML.html", HTML, "Test Indexation Html");
-    }
-
-    @Test
-    public void testOpenOffice() throws Exception {
-        assertAutoDetect("testOpenOffice2.odt", OPENOFFICE,
-                "This is a sample Open Office document");
-    }
-
-    @Test
-    public void testPDF() throws Exception {
-        assertAutoDetect("testPDF.pdf", PDF, "Content Analysis Toolkit");
-
-    }
-
-    @Test
-    public void testPowerpoint() throws Exception {
-        assertAutoDetect("testPPT.ppt", POWERPOINT, "Sample Powerpoint Slide");
-    }
-
-    @Test
-    public void testRdfXml() throws Exception {
-        assertAutoDetect("testRDF.rdf", "application/rdf+xml", "");
-    }
-
-    @Test
-    public void testRTF() throws Exception {
-        assertAutoDetect("testRTF.rtf", RTF, "indexation Word");
-    }
-
-    @Test
-    public void testText() throws Exception {
-        assertAutoDetect("testTXT.txt", PLAINTEXT, "indexation de Txt");
-    }
-    
-    @Test
-    public void testTextNonASCIIUTF8() throws Exception {
-        assertAutoDetect("testTXTNonASCIIUTF8.txt", UTF8TEXT, "The quick brown fox jumps over the lazy dog");
-    }
-
-    @Test
-    public void testWord() throws Exception {
-        assertAutoDetect("testWORD.doc", WORD, "Sample Word Document");
-    }
-
-    @Test
-    public void testXML() throws Exception {
-        assertAutoDetect("testXML.xml", XML, "Lius");
-    }
-
-    @Test
-    public void testRss() throws Exception {
-        assertAutoDetect("/test-documents/rsstest.rss", "feed", RSS, "application/rss+xml", "Sample RSS File for Junit test");
-    }
-    
-    @Test
-    public void testImages() throws Exception {
-       assertAutoDetect("testBMP.bmp", BMP, null);
-       assertAutoDetect("testGIF.gif", GIF, null);
-       assertAutoDetect("testJPEG.jpg", JPEG, null);
-       assertAutoDetect("testPNG.png", PNG, null);
-   }
-
-    /**
-     * Make sure that zip bomb attacks are prevented.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-216">TIKA-216</a>
-     */
-    @Test
-    public void testZipBombPrevention() throws Exception {
-        try (InputStream tgz = AutoDetectParserTest.class.getResourceAsStream(
-                "/test-documents/TIKA-216.tgz")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler(-1);
-            new AutoDetectParser(tika).parse(tgz, handler, metadata);
-            fail("Zip bomb was not detected");
-        } catch (TikaException e) {
-            // expected
-        }
-    }
-
-    /**
-     * Make sure XML parse errors don't trigger ZIP bomb detection.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-1322">TIKA-1322</a>
-     */
-    @Test
-    public void testNoBombDetectedForInvalidXml() throws Exception {
-        // create zip with ten empty / invalid XML files, 1.xml .. 10.xml
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        ZipOutputStream zos = new ZipOutputStream(baos);
-        for (int i = 1; i <= 10; i++) {
-            zos.putNextEntry(new ZipEntry(i + ".xml"));
-            zos.closeEntry();
-        }
-        zos.finish();
-        zos.close();
-        new AutoDetectParser(tika).parse(new ByteArrayInputStream(baos.toByteArray()), new BodyContentHandler(-1),
-                new Metadata());
-    }
-
-    /**
-     * Test to ensure that the Ogg Audio parsers (Vorbis, Opus, Flac etc)
-     *  have been correctly included, and are available
-     */
-    @SuppressWarnings("deprecation")
-    @Test
-    public void testOggFlacAudio() throws Exception {
-       // The three test files should all have similar test data
-       String[] testFiles = new String[] {
-             "testVORBIS.ogg", "testFLAC.flac", "testFLAC.oga",
-             "testOPUS.opus"
-       };
-       MediaType[] mediaTypes = new MediaType[] {
-               MediaType.parse(OGG_VORBIS), MediaType.parse(FLAC_NATIVE),
-               MediaType.parse(OGG_FLAC), MediaType.parse(OGG_OPUS)
-       };
-       
-       // Check we can load the parsers, and they claim to do the right things
-       VorbisParser vParser = new VorbisParser();
-       assertNotNull("Parser not found for " + mediaTypes[0], 
-                     vParser.getSupportedTypes(new ParseContext()));
-       
-       FlacParser fParser = new FlacParser();
-       assertNotNull("Parser not found for " + mediaTypes[1], 
-                     fParser.getSupportedTypes(new ParseContext()));
-       assertNotNull("Parser not found for " + mediaTypes[2], 
-                     fParser.getSupportedTypes(new ParseContext()));
-       
-       OpusParser oParser = new OpusParser();
-       assertNotNull("Parser not found for " + mediaTypes[3], 
-                     oParser.getSupportedTypes(new ParseContext()));
-       
-       // Check we found the parser
-       CompositeParser parser = (CompositeParser)tika.getParser();
-       for (MediaType mt : mediaTypes) {
-          assertNotNull("Parser not found for " + mt, parser.getParsers().get(mt) );
-       }
-       
-       // Have each file parsed, and check
-       for (int i=0; i<testFiles.length; i++) {
-           String file = testFiles[i];
-           try (InputStream input = AutoDetectParserTest.class.getResourceAsStream(
-                   "/test-documents/" + file)) {
-               if (input == null) {
-                   fail("Could not find test file " + file);
-               }
-               Metadata metadata = new Metadata();
-               ContentHandler handler = new BodyContentHandler();
-               new AutoDetectParser(tika).parse(input, handler, metadata);
-
-               assertEquals("Incorrect content type for " + file,
-                       mediaTypes[i].toString(), metadata.get(Metadata.CONTENT_TYPE));
-
-               // Check some of the common metadata
-               // Old style metadata
-               assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-               assertEquals("Test Title", metadata.get(Metadata.TITLE));
-               // New style metadata
-               assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
-               assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
-
-               // Check some of the XMPDM metadata
-               if (!file.endsWith(".opus")) {
-                   assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
-               }
-               assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
-               assertEquals("Stereo", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
-               assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
-
-               // Check some of the text
-               String content = handler.toString();
-               assertTrue(content.contains("Test Title"));
-               assertTrue(content.contains("Test Artist"));
-           }
-       }
-    }
-    
-    /**
-     * Test case for TIKA-514. Provide constructor for AutoDetectParser that has explicit
-     * list of supported parsers.
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-514">TIKA-514</a>
-     */
-    @Test
-    public void testSpecificParserList() throws Exception {
-        AutoDetectParser parser = new AutoDetectParser(new MyDetector(), new MyParser());
-        
-        InputStream is = new ByteArrayInputStream("test".getBytes(UTF_8));
-        Metadata metadata = new Metadata();
-        parser.parse(is, new BodyContentHandler(), metadata, new ParseContext());
-        
-        assertEquals("value", metadata.get("MyParser"));
-    }
-
-    private static final MediaType MY_MEDIA_TYPE = new MediaType("application", "x-myparser");
-    
-    /**
-     * A test detector which always returns the type supported
-     *  by the test parser
-     */
-    @SuppressWarnings("serial")
-    private static class MyDetector implements Detector {
-        public MediaType detect(InputStream input, Metadata metadata) throws IOException {
-            return MY_MEDIA_TYPE;
-        }
-    }
-    
-    @SuppressWarnings("serial")
-    private static class MyParser extends AbstractParser {
-        public Set<MediaType> getSupportedTypes(ParseContext context) {
-            Set<MediaType> supportedTypes = new HashSet<MediaType>();
-            supportedTypes.add(MY_MEDIA_TYPE);
-            return supportedTypes;
-        }
-
-        public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) {
-            metadata.add("MyParser", "value");
-        }
-
-    }
-    
-    /**
-     * Minimal class to encapsulate all parameters -- the main reason for
-     * its existence is to aid in debugging via its toString() method.
-     *
-     * Getters and setters intentionally not provided.
-     */
-    private static class TestParams {
-
-        public String resourceRealName;
-        public String resourceStatedName;
-        public String realType;
-        public String statedType;
-        public String expectedContentFragment;
-
-
-        private TestParams(String resourceRealName,
-                           String resourceStatedName,
-                           String realType,
-                           String statedType,
-                           String expectedContentFragment) {
-            this.resourceRealName = resourceRealName;
-            this.resourceStatedName = resourceStatedName;
-            this.realType = realType;
-            this.statedType = statedType;
-            this.expectedContentFragment = expectedContentFragment;
-        }
-
-
-        /**
-         * Produces a string like the following:
-         *
-         * <pre>
-         * Test parameters:
-         *   resourceRealName        = /test-documents/testEXCEL.xls
-         *   resourceStatedName      = null
-         *   realType                = application/vnd.ms-excel
-         *   statedType              = null
-         *   expectedContentFragment = Sample Excel Worksheet
-         * </pre>
-         */
-        public String toString() {
-            return "Test parameters:\n"
-                + "  resourceRealName        = " + resourceRealName + "\n"
-                + "  resourceStatedName      = " + resourceStatedName + "\n"
-                + "  realType                = " + realType + "\n"
-                + "  statedType              = " + statedType + "\n"
-                + "  expectedContentFragment = " + expectedContentFragment + "\n";
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
deleted file mode 100644
index 68edfc2..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.InputStream;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.utils.CommonsDigester;
-import org.junit.Test;
-
-
-public class DigestingParserTest extends TikaTest {
-
-    private final static String P = TikaCoreProperties.TIKA_META_PREFIX+
-            "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER;
-
-    private final int UNLIMITED = 1000000;//well, not really, but longer than input file
-    private final Parser p = new AutoDetectParser();
-
-    @Test
-    public void testBasic() throws Exception {
-        Map<CommonsDigester.DigestAlgorithm, String> expected =
-                new HashMap<CommonsDigester.DigestAlgorithm, String>();
-
-        expected.put(CommonsDigester.DigestAlgorithm.MD2,"d768c8e27b0b52c6eaabfaa7122d1d4f");
-        expected.put(CommonsDigester.DigestAlgorithm.MD5,"59f626e09a8c16ab6dbc2800c685f772");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA1,"7a1f001d163ac90d8ea54c050faf5a38079788a6");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA256,"c4b7fab030a8b6a9d6691f6699ac8e6f" +
-                                                            "82bc53764a0f1430d134ae3b70c32654");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA384,"ebe368b9326fef44408290724d187553"+
-                                                            "8b8a6923fdf251ddab72c6e4b5d54160" +
-                                                            "9db917ba4260d1767995a844d8d654df");
-        expected.put(CommonsDigester.DigestAlgorithm.SHA512,"ee46d973ee1852c018580c242955974d"+
-                                                            "da4c21f36b54d7acd06fcf68e974663b"+
-                                                            "fed1d256875be58d22beacf178154cc3"+
-                                                            "a1178cb73443deaa53aa0840324708bb");
-
-        //test each one
-        for (CommonsDigester.DigestAlgorithm algo : CommonsDigester.DigestAlgorithm.values()) {
-            Metadata m = new Metadata();
-            XMLResult xml = getXML("test_recursive_embedded.docx",
-                    new DigestingParser(p, new CommonsDigester(UNLIMITED, algo)), m);
-            assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
-        }
-
-
-        //test comma separated
-        CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse("md5,sha256,sha384,sha512");
-        Metadata m = new Metadata();
-        XMLResult xml = getXML("test_recursive_embedded.docx",
-                new DigestingParser(p, new CommonsDigester(UNLIMITED, algos)), m);
-        for (CommonsDigester.DigestAlgorithm algo : new CommonsDigester.DigestAlgorithm[]{
-                CommonsDigester.DigestAlgorithm.MD5,
-                CommonsDigester.DigestAlgorithm.SHA256,
-                CommonsDigester.DigestAlgorithm.SHA384,
-                CommonsDigester.DigestAlgorithm.SHA512}) {
-            assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
-        }
-
-        assertNull(m.get(P+CommonsDigester.DigestAlgorithm.MD2.toString()));
-        assertNull(m.get(P+CommonsDigester.DigestAlgorithm.SHA1.toString()));
-
-    }
-
-    @Test
-    public void testLimitedRead() throws Exception {
-        CommonsDigester.DigestAlgorithm algo = CommonsDigester.DigestAlgorithm.MD5;
-        int limit = 100;
-        byte[] bytes = new byte[limit];
-        InputStream is = getResourceAsStream("/test-documents/test_recursive_embedded.docx");
-        is.read(bytes, 0, limit);
-        is.close();
-        Metadata m = new Metadata();
-        try {
-            XMLResult xml = getXML(TikaInputStream.get(bytes),
-                    new DigestingParser(p, new CommonsDigester(100, algo)), m);
-        } catch (TikaException e) {
-            //thrown because this is just a file fragment
-            assertContains("Unexpected RuntimeException from org.apache.tika.parser.microsoft.ooxml.OOXMLParser",
-                    e.getMessage());
-        }
-        String expectedMD5 = m.get(P+"MD5");
-
-        m = new Metadata();
-        XMLResult xml = getXML("test_recursive_embedded.docx",
-                new DigestingParser(p, new CommonsDigester(100, algo)), m);
-        assertEquals(expectedMD5, m.get(P+"MD5"));
-    }
-
-    @Test
-    public void testReset() throws Exception {
-        String expectedMD5 = "1643c2cef21e36720c54f4f6cb3349d0";
-        Metadata m = new Metadata();
-        XMLResult xml = getXML("test_recursive_embedded.docx",
-                new DigestingParser(p, new CommonsDigester(100, CommonsDigester.DigestAlgorithm.MD5)), m);
-        assertEquals(expectedMD5, m.get(P+"MD5"));
-    }
-
-    @Test
-    public void testNegativeMaxMarkLength() throws Exception {
-        Metadata m = new Metadata();
-        boolean ex = false;
-        try {
-            XMLResult xml = getXML("test_recursive_embedded.docx",
-                    new DigestingParser(p, new CommonsDigester(-1, CommonsDigester.DigestAlgorithm.MD5)), m);
-        } catch (IllegalArgumentException e) {
-            ex = true;
-        }
-        assertTrue("Exception not thrown", ex);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
deleted file mode 100644
index 2fcd1c3..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.io.Reader;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.junit.Test;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-
-public class ParsingReaderTest {
-
-    @Test
-    public void testPlainText() throws Exception {
-        String data = "test content";
-        InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
-        Reader reader = new ParsingReader(stream, "test.txt");
-        assertEquals('t', reader.read());
-        assertEquals('e', reader.read());
-        assertEquals('s', reader.read());
-        assertEquals('t', reader.read());
-        assertEquals(' ', reader.read());
-        assertEquals('c', reader.read());
-        assertEquals('o', reader.read());
-        assertEquals('n', reader.read());
-        assertEquals('t', reader.read());
-        assertEquals('e', reader.read());
-        assertEquals('n', reader.read());
-        assertEquals('t', reader.read());
-        assertEquals('\n', reader.read());
-        assertEquals(-1, reader.read());
-        reader.close();
-        assertEquals(-1, stream.read());
-    }
-
-    @Test
-    public void testXML() throws Exception {
-        String data = "<p>test <span>content</span></p>";
-        InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
-        Reader reader = new ParsingReader(stream, "test.xml");
-        assertEquals(' ', (char) reader.read());
-        assertEquals('t', (char) reader.read());
-        assertEquals('e', (char) reader.read());
-        assertEquals('s', (char) reader.read());
-        assertEquals('t', (char) reader.read());
-        assertEquals(' ', (char) reader.read());
-        assertEquals(' ', (char) reader.read());
-        assertEquals('c', (char) reader.read());
-        assertEquals('o', (char) reader.read());
-        assertEquals('n', (char) reader.read());
-        assertEquals('t', (char) reader.read());
-        assertEquals('e', (char) reader.read());
-        assertEquals('n', (char) reader.read());
-        assertEquals('t', (char) reader.read());
-        assertEquals('\n', (char) reader.read());
-        assertEquals(-1, reader.read());
-        reader.close();
-        assertEquals(-1, stream.read());
-    }
-
-    /**
-     * Test case for TIKA-203
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-203">TIKA-203</a>
-     */
-    @Test
-    public void testMetadata() throws Exception {
-        Metadata metadata = new Metadata();
-        InputStream stream = ParsingReaderTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL.xls");
-        try (Reader reader = new ParsingReader(
-                new AutoDetectParser(), stream, metadata, new ParseContext())) {
-            // Metadata should already be available
-            assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
-            // Check that the internal buffering isn't broken
-            assertEquals('F', (char) reader.read());
-            assertEquals('e', (char) reader.read());
-            assertEquals('u', (char) reader.read());
-            assertEquals('i', (char) reader.read());
-            assertEquals('l', (char) reader.read());
-            assertEquals('1', (char) reader.read());
-        }
-    }
-
-}


[06/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/main/appended-resources/META-INF/LICENSE
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/appended-resources/META-INF/LICENSE b/tika-parsers/src/main/appended-resources/META-INF/LICENSE
deleted file mode 100644
index bd54624..0000000
--- a/tika-parsers/src/main/appended-resources/META-INF/LICENSE
+++ /dev/null
@@ -1,94 +0,0 @@
-APACHE TIKA SUBCOMPONENTS
-
-Apache Tika includes a number of subcomponents with separate copyright notices
-and license terms. Your use of these subcomponents is subject to the terms and
-conditions of the following licenses.
-
-Charset detection code from ICU4J (http://site.icu-project.org/)
-
-    Copyright (c) 1995-2009 International Business Machines Corporation
-    and others
-
-    All rights reserved.
-
-    Permission is hereby granted, free of charge, to any person obtaining
-    a copy of this software and associated documentation files (the
-    "Software"), to deal in the Software without restriction, including
-    without limitation the rights to use, copy, modify, merge, publish,
-    distribute, and/or sell copies of the Software, and to permit persons
-    to whom the Software is furnished to do so, provided that the above
-    copyright notice(s) and this permission notice appear in all copies
-    of the Software and that both the above copyright notice(s) and this
-    permission notice appear in supporting documentation.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-    IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
-    BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
-    OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-    WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
-    ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-    SOFTWARE.
-
-    Except as contained in this notice, the name of a copyright holder shall
-    not be used in advertising or otherwise to promote the sale, use or other
-    dealings in this Software without prior written authorization of the
-    copyright holder.
-
-
-JUnRAR (https://github.com/edmund-wagner/junrar/)
-
-      JUnRAR is based on the UnRAR tool, and covered by the same license
-      It was formerly available from http://java-unrar.svn.sourceforge.net/
-
- ******    *****   ******   UnRAR - free utility for RAR archives
- **   **  **   **  **   **  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ******   *******  ******    License for use and distribution of
- **   **  **   **  **   **   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- **   **  **   **  **   **         FREE portable version
-                                   ~~~~~~~~~~~~~~~~~~~~~
-
-      The source code of UnRAR utility is freeware. This means:
-
-   1. All copyrights to RAR and the utility UnRAR are exclusively
-      owned by the author - Alexander Roshal.
-
-   2. The UnRAR sources may be used in any software to handle RAR
-      archives without limitations free of charge, but cannot be used
-      to re-create the RAR compression algorithm, which is proprietary.
-      Distribution of modified UnRAR sources in separate form or as a
-      part of other software is permitted, provided that it is clearly
-      stated in the documentation and source comments that the code may
-      not be used to develop a RAR (WinRAR) compatible archiver.
-
-   3. The UnRAR utility may be freely distributed. It is allowed
-      to distribute UnRAR inside of other software packages.
-
-   4. THE RAR ARCHIVER AND THE UnRAR UTILITY ARE DISTRIBUTED "AS IS".
-      NO WARRANTY OF ANY KIND IS EXPRESSED OR IMPLIED.  YOU USE AT 
-      YOUR OWN RISK. THE AUTHOR WILL NOT BE LIABLE FOR DATA LOSS, 
-      DAMAGES, LOSS OF PROFITS OR ANY OTHER KIND OF LOSS WHILE USING
-      OR MISUSING THIS SOFTWARE.
-
-   5. Installing and using the UnRAR utility signifies acceptance of
-      these terms and conditions of the license.
-
-   6. If you don't agree with terms of the license you must remove
-      UnRAR files from your storage devices and cease to use the
-      utility.
-
-      Thank you for your interest in RAR and UnRAR.  Alexander L. Roshal
-
-Sqlite (included in the "provided" org.xerial's sqlite-jdbc)
-    Sqlite is in the Public Domain.  For details
-    see: https://www.sqlite.org/copyright.html
-
-Two photos in test-documents (testWebp_Alpha_Lossy.webp and testWebp_Alpha_Lossless.webp)
-    are in the public domain.  These files were retrieved from:
-    https://github.com/drewnoakes/metadata-extractor-images/tree/master/webp
-    These photos are also available here:
-    https://developers.google.com/speed/webp/gallery2#webp_links
-    Credits for the photo:
-    "Free Stock Photo in High Resolution - Yellow Rose 3 - Flowers"
-    Image Author: Jon Sullivan

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java b/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
deleted file mode 100644
index a884d3a..0000000
--- a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.internal;
-
-import java.util.Properties;
-
-import org.apache.tika.detect.DefaultDetector;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.parser.DefaultParser;
-import org.apache.tika.parser.Parser;
-import org.osgi.framework.BundleActivator;
-import org.osgi.framework.BundleContext;
-import org.osgi.framework.ServiceRegistration;
-
-public class Activator implements BundleActivator {
-
-    private ServiceRegistration detectorService;
-
-    private ServiceRegistration parserService;
-
-    @Override
-    public void start(BundleContext context) throws Exception {
-        detectorService = context.registerService(
-                Detector.class.getName(),
-                new DefaultDetector(Activator.class.getClassLoader()),
-                new Properties());
-        Parser parser = new DefaultParser(Activator.class.getClassLoader());
-        parserService = context.registerService(
-                Parser.class.getName(),
-                parser,
-                new Properties());
-    }
-
-    @Override
-    public void stop(BundleContext context) throws Exception {
-        parserService.unregister();
-        detectorService.unregister();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java b/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
deleted file mode 100644
index a064156..0000000
--- a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
+++ /dev/null
@@ -1,299 +0,0 @@
-package org.apache.tika.parser.utils;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.ParseContext;
-
-/**
- * Implementation of {@link org.apache.tika.parser.DigestingParser.Digester}
- * that relies on commons.codec.digest.DigestUtils to calculate digest hashes.
- * <p>
- * This digester tries to use the regular mark/reset protocol on the InputStream.
- * However, this wraps an internal BoundedInputStream, and if the InputStream
- * is not fully read, then this will reset the stream and
- * spool the InputStream to disk (via TikaInputStream) and then digest the file.
- * <p>
- * If a TikaInputStream is passed in and it has an underlying file that is longer
- * than the {@link #markLimit}, then this digester digests the file directly.
- *
- */
-public class CommonsDigester implements DigestingParser.Digester {
-
-    public enum DigestAlgorithm {
-        //those currently available in commons.digest
-        MD2,
-        MD5,
-        SHA1,
-        SHA256,
-        SHA384,
-        SHA512;
-
-        String getMetadataKey() {
-            return TikaCoreProperties.TIKA_META_PREFIX+
-                    "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER+this.toString();
-        }
-    }
-
-    private final List<DigestAlgorithm> algorithms = new ArrayList<DigestAlgorithm>();
-    private final int markLimit;
-
-    public CommonsDigester(int markLimit, DigestAlgorithm... algorithms) {
-        Collections.addAll(this.algorithms, algorithms);
-        if (markLimit < 0) {
-            throw new IllegalArgumentException("markLimit must be >= 0");
-        }
-        this.markLimit = markLimit;
-    }
-
-    @Override
-    public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException {
-        InputStream tis = TikaInputStream.get(is);
-        long sz = -1;
-        if (((TikaInputStream)tis).hasFile()) {
-            sz = ((TikaInputStream)tis).getLength();
-        }
-        //if the file is definitely a file,
-        //and its size is greater than its mark limit,
-        //just digest the underlying file.
-        if (sz > markLimit) {
-            digestFile(((TikaInputStream)tis).getFile(), m);
-            return;
-        }
-
-        //try the usual mark/reset stuff.
-        //however, if you actually hit the bound,
-        //then stop and spool to file via TikaInputStream
-        SimpleBoundedInputStream bis = new SimpleBoundedInputStream(markLimit, tis);
-        boolean finishedStream = false;
-        for (DigestAlgorithm algorithm : algorithms) {
-            bis.mark(markLimit + 1);
-            finishedStream = digestEach(algorithm, bis, m);
-            bis.reset();
-            if (!finishedStream) {
-                break;
-            }
-        }
-        if (!finishedStream) {
-            digestFile(((TikaInputStream)tis).getFile(), m);
-        }
-    }
-
-    private void digestFile(File f, Metadata m) throws IOException {
-        for (DigestAlgorithm algorithm : algorithms) {
-            InputStream is = new FileInputStream(f);
-            try {
-                digestEach(algorithm, is, m);
-            } finally {
-                IOUtils.closeQuietly(is);
-            }
-        }
-    }
-
-    /**
-     *
-     * @param algorithm algo to use
-     * @param is input stream to read from
-     * @param metadata metadata for reporting the digest
-     * @return whether or not this finished the input stream
-     * @throws IOException
-     */
-    private boolean digestEach(DigestAlgorithm algorithm,
-                            InputStream is, Metadata metadata) throws IOException {
-        String digest = null;
-        try {
-            switch (algorithm) {
-                case MD2:
-                    digest = DigestUtils.md2Hex(is);
-                    break;
-                case MD5:
-                    digest = DigestUtils.md5Hex(is);
-                    break;
-                case SHA1:
-                    digest = DigestUtils.sha1Hex(is);
-                    break;
-                case SHA256:
-                    digest = DigestUtils.sha256Hex(is);
-                    break;
-                case SHA384:
-                    digest = DigestUtils.sha384Hex(is);
-                    break;
-                case SHA512:
-                    digest = DigestUtils.sha512Hex(is);
-                    break;
-                default:
-                    throw new IllegalArgumentException("Sorry, not aware of algorithm: " + algorithm.toString());
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-            //swallow, or should we throw this?
-        }
-        if (is instanceof SimpleBoundedInputStream) {
-            if (((SimpleBoundedInputStream)is).hasHitBound()) {
-                return false;
-            }
-        }
-        metadata.set(algorithm.getMetadataKey(), digest);
-        return true;
-    }
-
-    /**
-     *
-     * @param s comma-delimited (no space) list of algorithms to use: md5,sha256
-     * @return
-     */
-    public static DigestAlgorithm[] parse(String s) {
-        assert(s != null);
-
-        List<DigestAlgorithm> ret = new ArrayList<DigestAlgorithm>();
-        for (String algoString : s.split(",")) {
-            String uc = algoString.toUpperCase(Locale.ROOT);
-            if (uc.equals(DigestAlgorithm.MD2.toString())) {
-                ret.add(DigestAlgorithm.MD2);
-            } else if (uc.equals(DigestAlgorithm.MD5.toString())) {
-                ret.add(DigestAlgorithm.MD5);
-            } else if (uc.equals(DigestAlgorithm.SHA1.toString())) {
-                ret.add(DigestAlgorithm.SHA1);
-            } else if (uc.equals(DigestAlgorithm.SHA256.toString())) {
-                ret.add(DigestAlgorithm.SHA256);
-            } else if (uc.equals(DigestAlgorithm.SHA384.toString())) {
-                ret.add(DigestAlgorithm.SHA384);
-            } else if (uc.equals(DigestAlgorithm.SHA512.toString())) {
-                ret.add(DigestAlgorithm.SHA512);
-            } else {
-                StringBuilder sb = new StringBuilder();
-                int i = 0;
-                for (DigestAlgorithm algo : DigestAlgorithm.values()) {
-                    if (i++ > 0) {
-                        sb.append(", ");
-                    }
-                    sb.append(algo.toString());
-                }
-                throw new IllegalArgumentException("Couldn't match " + s + " with any of: " + sb.toString());
-            }
-        }
-        return ret.toArray(new DigestAlgorithm[ret.size()]);
-    }
-
-    /**
-     * Very slight modification of Commons' BoundedInputStream
-     * so that we can figure out if this hit the bound or not.
-     */
-    private class SimpleBoundedInputStream extends InputStream {
-        private final static int EOF = -1;
-        private final long max;
-        private final InputStream in;
-        private long pos;
-        boolean hitBound = false;
-
-        private SimpleBoundedInputStream(long max, InputStream in) {
-            this.max = max;
-            this.in = in;
-        }
-
-        @Override
-        public int read() throws IOException {
-            if (max >= 0 && pos >= max) {
-                hitBound = true;
-                return EOF;
-            }
-            final int result = in.read();
-            pos++;
-            return result;
-        }
-
-        /**
-         * Invokes the delegate's <code>read(byte[])</code> method.
-         * @param b the buffer to read the bytes into
-         * @return the number of bytes read or -1 if the end of stream or
-         * the limit has been reached.
-         * @throws IOException if an I/O error occurs
-         */
-        @Override
-        public int read(final byte[] b) throws IOException {
-            return this.read(b, 0, b.length);
-        }
-
-        /**
-         * Invokes the delegate's <code>read(byte[], int, int)</code> method.
-         * @param b the buffer to read the bytes into
-         * @param off The start offset
-         * @param len The number of bytes to read
-         * @return the number of bytes read or -1 if the end of stream or
-         * the limit has been reached.
-         * @throws IOException if an I/O error occurs
-         */
-        @Override
-        public int read(final byte[] b, final int off, final int len) throws IOException {
-            if (max>=0 && pos>=max) {
-                return EOF;
-            }
-            final long maxRead = max>=0 ? Math.min(len, max-pos) : len;
-            final int bytesRead = in.read(b, off, (int)maxRead);
-
-            if (bytesRead==EOF) {
-                return EOF;
-            }
-
-            pos+=bytesRead;
-            return bytesRead;
-        }
-
-        /**
-         * Invokes the delegate's <code>skip(long)</code> method.
-         * @param n the number of bytes to skip
-         * @return the actual number of bytes skipped
-         * @throws IOException if an I/O error occurs
-         */
-        @Override
-        public long skip(final long n) throws IOException {
-            final long toSkip = max>=0 ? Math.min(n, max-pos) : n;
-            final long skippedBytes = in.skip(toSkip);
-            pos+=skippedBytes;
-            return skippedBytes;
-        }
-
-        @Override
-        public void reset() throws IOException {
-            in.reset();
-        }
-
-        @Override
-        public void mark(int readLimit) {
-            in.mark(readLimit);
-        }
-
-        public boolean hasHitBound() {
-            return hitBound;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestParsers.java b/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
deleted file mode 100644
index ddd671d..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.junit.Before;
-import org.junit.Test;
-import org.xml.sax.helpers.DefaultHandler;
-
-/**
- * Junit test class for Tika {@link Parser}s.
- */
-public class TestParsers extends TikaTest {
-
-    private TikaConfig tc;
-
-    private Tika tika;
-
-    @Before
-    public void setUp() throws Exception {
-        tc = TikaConfig.getDefaultConfig();
-        tika = new Tika(tc);
-    }
-
-    @Test
-    public void testWORDxtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testWORD.doc");
-        Parser parser = tika.getParser();
-        Metadata metadata = new Metadata();
-        try (InputStream stream = new FileInputStream(file)) {
-            parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-        }
-        assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
-    }
-
-    @Test
-    public void testEXCELExtraction() throws Exception {
-        final String expected = "Numbers and their Squares";
-        File file = getResourceAsFile("/test-documents/testEXCEL.xls");
-        String s1 = tika.parseToString(file);
-        assertTrue("Text does not contain '" + expected + "'", s1
-                .contains(expected));
-        Parser parser = tika.getParser();
-        Metadata metadata = new Metadata();
-        try (InputStream stream = new FileInputStream(file)) {
-            parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-        }
-        assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
-    }
-
-    @Test
-    public void testOptionalHyphen() throws Exception {
-        String[] extensions =
-                new String[] { "ppt", "pptx", "doc", "docx", "rtf", "pdf"};
-        for (String extension : extensions) {
-            File file = getResourceAsFile("/test-documents/testOptionalHyphen." + extension);
-            String content = tika.parseToString(file);
-            assertTrue("optional hyphen was not handled for '" + extension + "' file type: " + content,
-                       content.contains("optionalhyphen") ||
-                       content.contains("optional\u00adhyphen") ||   // soft hyphen
-                       content.contains("optional\u200bhyphen") ||   // zero width space
-                       content.contains("optional\u2027"));          // hyphenation point
-            
-        }
-    }
-
-    private void verifyComment(String extension, String fileName) throws Exception {
-        File file = getResourceAsFile("/test-documents/" + fileName + "." + extension);
-        String content = tika.parseToString(file);
-        assertTrue(extension + ": content=" + content + " did not extract text",
-                   content.contains("Here is some text"));
-        assertTrue(extension + ": content=" + content + " did not extract comment",
-                   content.contains("Here is a comment"));
-    }
-
-    @Test
-    public void testComment() throws Exception {
-        final String[] extensions = new String[] {"ppt", "pptx", "doc", 
-            "docx", "xls", "xlsx", "pdf", "rtf"};
-        for(String extension : extensions) {
-            verifyComment(extension, "testComment");
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java b/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
deleted file mode 100644
index 2125888..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.config;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import org.apache.tika.detect.CompositeDetector;
-import org.apache.tika.detect.DefaultDetector;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.detect.EmptyDetector;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.mbox.OutlookPSTParser;
-import org.apache.tika.parser.microsoft.POIFSContainerDetector;
-import org.apache.tika.parser.pkg.ZipContainerDetector;
-import org.junit.Test;
-
-/**
- * Junit test class for {@link TikaConfig}, which cover things
- *  that {@link TikaConfigTest} can't do due to a need for the
- *  full set of detectors
- */
-public class TikaDetectorConfigTest extends AbstractTikaConfigTest {
-    @Test
-    public void testDetectorExcludeFromDefault() throws Exception {
-        TikaConfig config = getConfig("TIKA-1702-detector-blacklist.xml");
-        assertNotNull(config.getParser());
-        assertNotNull(config.getDetector());
-        CompositeDetector detector = (CompositeDetector)config.getDetector();
-        
-        // Should be wrapping two detectors
-        assertEquals(2, detector.getDetectors().size());
-
-        
-        // First should be DefaultDetector, second Empty, that order
-        assertEquals(DefaultDetector.class, detector.getDetectors().get(0).getClass());
-        assertEquals(EmptyDetector.class,   detector.getDetectors().get(1).getClass());
-        
-        
-        // Get the DefaultDetector from the config
-        DefaultDetector confDetector = (DefaultDetector)detector.getDetectors().get(0);
-        
-        // Get a fresh "default" DefaultParser
-        DefaultDetector normDetector = new DefaultDetector(config.getMimeRepository());
-        
-        
-        // The default one will offer the Zip and POIFS detectors
-        assertDetectors(normDetector, true, true);
-        
-        
-        // The one from the config won't, as we excluded those
-        assertDetectors(confDetector, false, false);
-    }
-    
-    /**
-     * TIKA-1708 - If the Zip detector is disabled, either explicitly,
-     *  or via giving a list of detectors that it isn't part of, ensure
-     *  that detection of PST files still works
-     */
-    @Test
-    public void testPSTDetectionWithoutZipDetector() throws Exception {
-        // Check the one with an exclude
-        TikaConfig configWX = getConfig("TIKA-1708-detector-default.xml");
-        assertNotNull(configWX.getParser());
-        assertNotNull(configWX.getDetector());
-        CompositeDetector detectorWX = (CompositeDetector)configWX.getDetector();
-
-        // Check it has the POIFS one, but not the zip one
-        assertDetectors(detectorWX, true, false);
-        
-        
-        // Check the one with an explicit list
-        TikaConfig configCL = getConfig("TIKA-1708-detector-composite.xml");
-        assertNotNull(configCL.getParser());
-        assertNotNull(configCL.getDetector());
-        CompositeDetector detectorCL = (CompositeDetector)configCL.getDetector();
-        assertEquals(2, detectorCL.getDetectors().size());
-        
-        // Check it also has the POIFS one, but not the zip one
-        assertDetectors(detectorCL, true, false);
-        
-        
-        // Check that both detectors have a mimetypes with entries
-        assertTrue("Not enough mime types: " + configWX.getMediaTypeRegistry().getTypes().size(),
-                   configWX.getMediaTypeRegistry().getTypes().size() > 100);
-        assertTrue("Not enough mime types: " + configCL.getMediaTypeRegistry().getTypes().size(),
-                   configCL.getMediaTypeRegistry().getTypes().size() > 100);
-        
-        
-        // Now check they detect PST files correctly
-        TikaInputStream stream = TikaInputStream.get(
-                getResourceAsFile("/test-documents/testPST.pst"));
-        assertEquals(
-                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, 
-                detectorWX.detect(stream, new Metadata())
-        );
-        assertEquals(
-                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, 
-                detectorCL.detect(stream, new Metadata())
-        );
-    }
-    
-    private void assertDetectors(CompositeDetector detector, boolean shouldHavePOIFS,
-                                 boolean shouldHaveZip) {
-        boolean hasZip = false;
-        boolean hasPOIFS = false;
-        for (Detector d : detector.getDetectors()) {
-            if (d instanceof ZipContainerDetector) {
-                if (shouldHaveZip) {
-                    hasZip = true;
-                } else {
-                    fail("Shouldn't have the ZipContainerDetector from config");
-                }
-            }
-            if (d instanceof POIFSContainerDetector) {
-                if (shouldHavePOIFS) {
-                    hasPOIFS = true;
-                } else {
-                    fail("Shouldn't have the POIFSContainerDetector from config");
-                }
-            }
-        }
-        if (shouldHavePOIFS) assertTrue("Should have the POIFSContainerDetector", hasPOIFS);
-        if (shouldHaveZip)   assertTrue("Should have the ZipContainerDetector", hasZip);
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java b/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
deleted file mode 100644
index 2acd358..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.config;
-
-import static org.apache.tika.TikaTest.assertContains;
-import static org.apache.tika.TikaTest.assertNotContained;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.util.List;
-
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.CompositeParser;
-import org.apache.tika.parser.DefaultParser;
-import org.apache.tika.parser.EmptyParser;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserDecorator;
-import org.apache.tika.parser.executable.ExecutableParser;
-import org.apache.tika.parser.xml.XMLParser;
-import org.junit.Test;
-
-/**
- * Junit test class for {@link TikaConfig}, which cover things
- *  that {@link TikaConfigTest} can't do due to a need for the
- *  full set of parsers
- */
-public class TikaParserConfigTest extends AbstractTikaConfigTest {
-    @Test
-    public void testMimeExcludeInclude() throws Exception {
-        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
-        assertNotNull(config.getParser());
-        assertNotNull(config.getDetector());
-        Parser parser = config.getParser();
-        
-        MediaType PDF = MediaType.application("pdf");
-        MediaType JPEG = MediaType.image("jpeg");
-        
-        
-        // Has two parsers
-        assertEquals(CompositeParser.class, parser.getClass());
-        CompositeParser cParser = (CompositeParser)parser;
-        assertEquals(2, cParser.getAllComponentParsers().size());
-        
-        // Both are decorated
-        assertTrue(cParser.getAllComponentParsers().get(0) instanceof ParserDecorator);
-        assertTrue(cParser.getAllComponentParsers().get(1) instanceof ParserDecorator);
-        ParserDecorator p0 = (ParserDecorator)cParser.getAllComponentParsers().get(0);
-        ParserDecorator p1 = (ParserDecorator)cParser.getAllComponentParsers().get(1);
-        
-        
-        // DefaultParser will be wrapped with excludes
-        assertEquals(DefaultParser.class, p0.getWrappedParser().getClass());
-        
-        assertNotContained(PDF, p0.getSupportedTypes(context));
-        assertContains(PDF, p0.getWrappedParser().getSupportedTypes(context));
-        assertNotContained(JPEG, p0.getSupportedTypes(context));
-        assertContains(JPEG, p0.getWrappedParser().getSupportedTypes(context));
-        
-        
-        // Will have an empty parser for PDF
-        assertEquals(EmptyParser.class, p1.getWrappedParser().getClass());
-        assertEquals(1, p1.getSupportedTypes(context).size());
-        assertContains(PDF, p1.getSupportedTypes(context));
-        assertNotContained(PDF, p1.getWrappedParser().getSupportedTypes(context));
-    }
-    
-    @Test
-    public void testParserExcludeFromDefault() throws Exception {
-        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
-        assertNotNull(config.getParser());
-        assertNotNull(config.getDetector());
-        CompositeParser parser = (CompositeParser)config.getParser();
-        
-        MediaType PE_EXE = MediaType.application("x-msdownload");
-        MediaType ELF = MediaType.application("x-elf");
-        
-        
-        // Get the DefaultParser from the config
-        ParserDecorator confWrappedParser = (ParserDecorator)parser.getParsers().get(MediaType.APPLICATION_XML);
-        assertNotNull(confWrappedParser);
-        DefaultParser confParser = (DefaultParser)confWrappedParser.getWrappedParser();
-        
-        // Get a fresh "default" DefaultParser
-        DefaultParser normParser = new DefaultParser(config.getMediaTypeRegistry());
-        
-        
-        // The default one will offer the Executable Parser
-        assertContains(PE_EXE, normParser.getSupportedTypes(context));
-        assertContains(ELF, normParser.getSupportedTypes(context));
-        
-        boolean hasExec = false;
-        for (Parser p : normParser.getParsers().values()) {
-            if (p instanceof ExecutableParser) {
-                hasExec = true;
-                break;
-            }
-        }
-        assertTrue(hasExec);
-        
-        
-        // The one from the config won't
-        assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
-        assertNotContained(ELF, confParser.getSupportedTypes(context));
-        
-        for (Parser p : confParser.getParsers().values()) {
-            if (p instanceof ExecutableParser)
-                fail("Shouldn't have the Executable Parser from config");
-        }
-    }
-    /**
-     * TIKA-1558 It should be possible to exclude Parsers from being picked up by
-     * DefaultParser.
-     */
-    @Test
-    public void defaultParserBlacklist() throws Exception {
-        TikaConfig config = new TikaConfig();
-        assertNotNull(config.getParser());
-        assertNotNull(config.getDetector());
-        CompositeParser cp = (CompositeParser) config.getParser();
-        List<Parser> parsers = cp.getAllComponentParsers();
-
-        boolean hasXML = false;
-        for (Parser p : parsers) {
-            if (p instanceof XMLParser) {
-                hasXML = true;
-                break;
-            }
-        }
-        assertTrue("Default config should include an XMLParser.", hasXML);
-
-        // This custom TikaConfig should exclude XMLParser and all of its subclasses.
-        config = getConfig("TIKA-1558-blacklistsub.xml");
-        cp = (CompositeParser) config.getParser();
-        parsers = cp.getAllComponentParsers();
-
-        for (Parser p : parsers) {
-            if (p instanceof XMLParser)
-                fail("Custom config should not include an XMLParser (" + p.getClass() + ").");
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java b/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
deleted file mode 100644
index 71af206..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.config;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-
-import org.apache.tika.language.translate.DefaultTranslator;
-import org.apache.tika.language.translate.EmptyTranslator;
-import org.junit.Test;
-
-/**
- * Junit test class for {@link TikaConfig}, which cover things
- *  that {@link TikaConfigTest} can't do due to a need for the
- *  full set of translators
- */
-public class TikaTranslatorConfigTest extends AbstractTikaConfigTest {
-    @Test
-    public void testDefaultBehaviour() throws Exception {
-        TikaConfig config = TikaConfig.getDefaultConfig();
-        assertNotNull(config.getTranslator());
-        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
-    }
-    
-    @Test
-    public void testRequestsDefault() throws Exception {
-        TikaConfig config = getConfig("TIKA-1702-translator-default.xml");
-        assertNotNull(config.getParser());
-        assertNotNull(config.getDetector());
-        assertNotNull(config.getTranslator());
-        
-        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
-    }
-    
-    @Test
-    public void testRequestsEmpty() throws Exception {
-        TikaConfig config = getConfig("TIKA-1702-translator-empty.xml");
-        assertNotNull(config.getParser());
-        assertNotNull(config.getDetector());
-        assertNotNull(config.getTranslator());
-        
-        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
-    }
-    
-    /**
-     * Currently, Translators don't support Composites, so
-     *  if multiple translators are given, only the first wins
-     */
-    @Test
-    public void testRequestsMultiple() throws Exception {
-        TikaConfig config = getConfig("TIKA-1702-translator-empty-default.xml");
-        assertNotNull(config.getParser());
-        assertNotNull(config.getDetector());
-        assertNotNull(config.getTranslator());
-        
-        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
deleted file mode 100644
index 5787408..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.mime.MimeTypes;
-import org.junit.Test;
-
-/**
- * Junit test class for {@link ContainerAwareDetector}
- */
-public class TestContainerAwareDetector {
-    private final TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
-    private final MimeTypes mimeTypes = tikaConfig.getMimeRepository();
-    private final Detector detector = new DefaultDetector(mimeTypes);
-
-    private void assertTypeByData(String file, String type) throws Exception {
-       assertTypeByNameAndData(file, null, type);
-    }
-    private void assertTypeByNameAndData(String file, String type) throws Exception {
-       assertTypeByNameAndData(file, file, type);
-    }
-    private void assertType(String file, String byData, String byNameAndData) throws Exception {
-       assertTypeByData(file, byData);
-       assertTypeByNameAndData(file, byNameAndData);
-    }
-    private void assertTypeByNameAndData(String dataFile, String name, String type) throws Exception {
-        assertTypeByNameAndData(dataFile, name, type, null);
-    }
-    private void assertTypeByNameAndData(String dataFile, String name, String typeFromDetector, String typeFromMagic) throws Exception {
-        try (TikaInputStream stream = TikaInputStream.get(
-                TestContainerAwareDetector.class.getResource("/test-documents/" + dataFile))) {
-            Metadata m = new Metadata();
-            if (name != null)
-                m.add(Metadata.RESOURCE_NAME_KEY, name);
-
-            // Mime Magic version is likely to be less precise
-            if (typeFromMagic != null) {
-                assertEquals(
-                        MediaType.parse(typeFromMagic),
-                        mimeTypes.detect(stream, m));
-            }
-
-            // All being well, the detector should get it perfect
-            assertEquals(
-                    MediaType.parse(typeFromDetector),
-                    detector.detect(stream, m));
-        }
-    }
-
-    @Test
-    public void testDetectOLE2() throws Exception {
-        // Microsoft office types known by POI
-        assertTypeByData("testEXCEL.xls", "application/vnd.ms-excel");
-        assertTypeByData("testWORD.doc", "application/msword");
-        assertTypeByData("testPPT.ppt", "application/vnd.ms-powerpoint");
-        
-        assertTypeByData("test-outlook.msg", "application/vnd.ms-outlook");
-        assertTypeByData("test-outlook2003.msg", "application/vnd.ms-outlook");
-        assertTypeByData("testVISIO.vsd", "application/vnd.visio");
-        assertTypeByData("testPUBLISHER.pub", "application/x-mspublisher");
-        assertTypeByData("testWORKS.wps", "application/vnd.ms-works");
-        assertTypeByData("testWORKS2000.wps", "application/vnd.ms-works");
-        
-        // older Works Word Processor files can't be recognized
-        // they were created with Works Word Processor 7.0 (hence the text inside)
-        // and exported to the older formats with the "Save As" feature
-        assertTypeByData("testWORKSWordProcessor3.0.wps","application/vnd.ms-works");
-        assertTypeByData("testWORKSWordProcessor4.0.wps","application/vnd.ms-works");
-        assertTypeByData("testWORKSSpreadsheet7.0.xlr", "application/x-tika-msworks-spreadsheet");
-        assertTypeByData("testPROJECT2003.mpp", "application/vnd.ms-project");
-        assertTypeByData("testPROJECT2007.mpp", "application/vnd.ms-project");
-        
-        // Excel95 can be detected by not parsed
-        assertTypeByData("testEXCEL_95.xls", "application/vnd.ms-excel");
-
-        // Try some ones that POI doesn't handle, that are still OLE2 based
-        assertTypeByData("testCOREL.shw", "application/x-corelpresentations");
-        assertTypeByData("testQUATTRO.qpw", "application/x-quattro-pro");
-        assertTypeByData("testQUATTRO.wb3", "application/x-quattro-pro");
-        
-        assertTypeByData("testHWP_5.0.hwp", "application/x-hwp-v5");
-        
-        
-        // With the filename and data
-        assertTypeByNameAndData("testEXCEL.xls", "application/vnd.ms-excel");
-        assertTypeByNameAndData("testWORD.doc", "application/msword");
-        assertTypeByNameAndData("testPPT.ppt", "application/vnd.ms-powerpoint");
-        
-        // With the wrong filename supplied, data will trump filename
-        assertTypeByNameAndData("testEXCEL.xls", "notWord.doc",  "application/vnd.ms-excel");
-        assertTypeByNameAndData("testWORD.doc",  "notExcel.xls", "application/msword");
-        assertTypeByNameAndData("testPPT.ppt",   "notWord.doc",  "application/vnd.ms-powerpoint");
-        
-        // With a filename of a totally different type, data will trump filename
-        assertTypeByNameAndData("testEXCEL.xls", "notPDF.pdf",  "application/vnd.ms-excel");
-        assertTypeByNameAndData("testEXCEL.xls", "notPNG.png",  "application/vnd.ms-excel");
-    }
-    
-    /**
-     * There is no way to distinguish "proper" StarOffice files from templates.
-     * All templates have the same extension but their actual type depends on
-     * the magic. Our current MimeTypes class doesn't allow us to use the same
-     * glob pattern in more than one mimetype.
-     * 
-     * @throws Exception
-     */
-    @Test
-    public void testDetectStarOfficeFiles() throws Exception {
-        assertType("testStarOffice-5.2-calc.sdc",
-                "application/vnd.stardivision.calc",
-                "application/vnd.stardivision.calc");
-        assertType("testVORCalcTemplate.vor",
-                "application/vnd.stardivision.calc",
-                "application/vnd.stardivision.calc");
-        assertType("testStarOffice-5.2-draw.sda",
-                "application/vnd.stardivision.draw",
-                "application/vnd.stardivision.draw");
-        assertType("testVORDrawTemplate.vor",
-                "application/vnd.stardivision.draw",
-                "application/vnd.stardivision.draw");
-        assertType("testStarOffice-5.2-impress.sdd",
-                "application/vnd.stardivision.impress",
-                "application/vnd.stardivision.impress");
-        assertType("testVORImpressTemplate.vor",
-                "application/vnd.stardivision.impress",
-                "application/vnd.stardivision.impress");
-        assertType("testStarOffice-5.2-writer.sdw",
-                "application/vnd.stardivision.writer",
-                "application/vnd.stardivision.writer");
-        assertType("testVORWriterTemplate.vor",
-                "application/vnd.stardivision.writer",
-                "application/vnd.stardivision.writer");
-
-    }
-
-    @Test
-    public void testOpenContainer() throws Exception {
-        try (TikaInputStream stream = TikaInputStream.get(
-                TestContainerAwareDetector.class.getResource("/test-documents/testPPT.ppt"))) {
-            assertNull(stream.getOpenContainer());
-            assertEquals(
-                    MediaType.parse("application/vnd.ms-powerpoint"),
-                    detector.detect(stream, new Metadata()));
-            assertTrue(stream.getOpenContainer() instanceof NPOIFSFileSystem);
-        }
-    }
-
-    /**
-     * EPub uses a similar mimetype entry to OpenDocument for storing
-     *  the mimetype within the parent zip file
-     */
-    @Test
-    public void testDetectEPub() throws Exception {
-       assertTypeByData("testEPUB.epub", "application/epub+zip");
-       assertTypeByData("testiBooks.ibooks", "application/x-ibooks+zip");
-    }
-    
-    @Test
-    public void testDetectLotusNotesEml() throws Exception {
-        // Lotus .eml files aren't guaranteed to have any of the magic 
-        // matches as the first line, but should have X-Notes-Item and Message-ID
-        assertTypeByData("testLotusEml.eml", "message/rfc822");
-     }
-
-    @Test
-    public void testDetectODF() throws Exception {
-        assertTypeByData("testODFwithOOo3.odt", "application/vnd.oasis.opendocument.text");
-        assertTypeByData("testOpenOffice2.odf", "application/vnd.oasis.opendocument.formula");
-    }
-
-    @Test
-    public void testDetectOOXML() throws Exception {
-        assertTypeByData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-        assertTypeByData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-        assertTypeByData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
-
-        // Check some of the less common OOXML types
-        assertTypeByData("testPPT.pptm", "application/vnd.ms-powerpoint.presentation.macroenabled.12");
-        assertTypeByData("testPPT.ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow");
-        assertTypeByData("testPPT.ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12");
-        assertTypeByData("testDOTM.dotm", "application/vnd.ms-word.template.macroEnabled.12");
-        assertTypeByData("testEXCEL.strict.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-        assertTypeByData("testPPT.xps", "application/vnd.ms-xpsdocument");
-
-        assertTypeByData("testVISIO.vsdm", "application/vnd.ms-visio.drawing.macroenabled.12");
-        assertTypeByData("testVISIO.vsdx", "application/vnd.ms-visio.drawing");
-        assertTypeByData("testVISIO.vssm", "application/vnd.ms-visio.stencil.macroenabled.12");
-        assertTypeByData("testVISIO.vssx", "application/vnd.ms-visio.stencil");
-        assertTypeByData("testVISIO.vstm", "application/vnd.ms-visio.template.macroenabled.12");
-        assertTypeByData("testVISIO.vstx", "application/vnd.ms-visio.template");
-        
-        // .xlsb is an OOXML file containing the binary parts, and not
-        //  an OLE2 file as you might initially expect!
-        assertTypeByData("testEXCEL.xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12");
-
-        // With the filename and data
-        assertTypeByNameAndData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-        assertTypeByNameAndData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-        assertTypeByNameAndData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
-        
-        // With the wrong filename supplied, data will trump filename
-        assertTypeByNameAndData("testEXCEL.xlsx", "notWord.docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-        assertTypeByNameAndData("testWORD.docx",  "notExcel.xlsx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-        assertTypeByNameAndData("testPPT.pptx",   "notWord.docx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
-        
-        // With an incorrect filename of a different container type, data trumps filename
-        assertTypeByNameAndData("testEXCEL.xlsx", "notOldExcel.xls", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-    }
-    
-    /**
-     * Password Protected OLE2 files are fairly straightforward to detect, as they
-     *  have the same structure as regular OLE2 files. (Core streams may be encrypted
-     *  however)
-     */
-    @Test
-    public void testDetectProtectedOLE2() throws Exception {
-        assertTypeByData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
-        assertTypeByData("testWORD_protected_passtika.doc", "application/msword");
-        assertTypeByData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
-        assertTypeByNameAndData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
-        assertTypeByNameAndData("testWORD_protected_passtika.doc", "application/msword");
-        assertTypeByNameAndData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
-    }
-
-    /**
-     * Password Protected OOXML files are much more tricky beasts to work with.
-     * They have a very different structure to regular OOXML files, and instead
-     *  of being ZIP based they are actually an OLE2 file which contains the
-     *  OOXML structure within an encrypted stream.
-     * This makes detecting them much harder...
-     */
-    @Test
-    public void testDetectProtectedOOXML() throws Exception {
-        // Encrypted Microsoft Office OOXML files have OLE magic but
-        //  special streams, so we can tell they're Protected OOXML
-        assertTypeByData("testEXCEL_protected_passtika.xlsx", 
-                "application/x-tika-ooxml-protected");
-        assertTypeByData("testWORD_protected_passtika.docx", 
-                "application/x-tika-ooxml-protected");
-        assertTypeByData("testPPT_protected_passtika.pptx", 
-                "application/x-tika-ooxml-protected");
-        
-        // At the moment, we can't use the name to specialise
-        // See discussions on TIKA-790 for details
-        assertTypeByNameAndData("testEXCEL_protected_passtika.xlsx", 
-                "application/x-tika-ooxml-protected");
-        assertTypeByNameAndData("testWORD_protected_passtika.docx", 
-                "application/x-tika-ooxml-protected");
-        assertTypeByNameAndData("testPPT_protected_passtika.pptx", 
-                "application/x-tika-ooxml-protected");
-    }
-
-    /**
-     * Check that temporary files created by Tika are removed after
-     * closing TikaInputStream.
-     */
-    @Test
-    public void testRemovalTempfiles() throws Exception {
-        assertRemovalTempfiles("testWORD.docx");
-        assertRemovalTempfiles("test-documents.zip");
-    }
-
-    private int countTemporaryFiles() {
-        return new File(System.getProperty("java.io.tmpdir")).listFiles(
-                new FilenameFilter() {
-                    public boolean accept(File dir, String name) {
-                        return name.startsWith("apache-tika-");
-                    }
-                }).length;
-    }
-
-    private void assertRemovalTempfiles(String fileName) throws Exception {
-        int numberOfTempFiles = countTemporaryFiles();
-
-        try (TikaInputStream stream = TikaInputStream.get(
-                TestContainerAwareDetector.class.getResource("/test-documents/" + fileName))) {
-            detector.detect(stream, new Metadata());
-        }
-
-        assertEquals(numberOfTempFiles, countTemporaryFiles());
-    }
-
-    @Test
-    public void testDetectIWork() throws Exception {
-        assertTypeByData("testKeynote.key", "application/vnd.apple.keynote");
-        assertTypeByData("testNumbers.numbers", "application/vnd.apple.numbers");
-        assertTypeByData("testPages.pages", "application/vnd.apple.pages");
-    }
-
-    @Test
-    public void testDetectKMZ() throws Exception {
-       assertTypeByData("testKMZ.kmz", "application/vnd.google-earth.kmz");
-    }
-    
-    @Test
-    public void testDetectIPA() throws Exception {
-        assertTypeByNameAndData("testIPA.ipa", "application/x-itunes-ipa");
-        assertTypeByData("testIPA.ipa", "application/x-itunes-ipa");
-    }
-    
-    @Test
-    public void testASiC() throws Exception {
-        assertTypeByData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
-        assertTypeByData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
-        assertTypeByNameAndData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
-        assertTypeByNameAndData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
-    }
-     
-    @Test
-    public void testDetectZip() throws Exception {
-        assertTypeByData("test-documents.zip", "application/zip");
-        assertTypeByData("test-zip-of-zip.zip", "application/zip");
-        
-        // JAR based formats
-        assertTypeByData("testJAR.jar", "application/java-archive");
-        assertTypeByData("testWAR.war", "application/x-tika-java-web-archive");
-        assertTypeByData("testEAR.ear", "application/x-tika-java-enterprise-archive");
-        assertTypeByData("testAPK.apk", "application/vnd.android.package-archive");
-        
-        // JAR with HTML files in it
-        assertTypeByNameAndData("testJAR_with_HTML.jar", "testJAR_with_HTML.jar",
-                                "application/java-archive", "application/java-archive");
-    }
-
-    private TikaInputStream getTruncatedFile(String name, int n)
-            throws IOException {
-        try (InputStream input = TestContainerAwareDetector.class.getResourceAsStream(
-                "/test-documents/" + name)) {
-            byte[] bytes = new byte[n];
-            int m = 0;
-            while (m < bytes.length) {
-                int i = input.read(bytes, m, bytes.length - m);
-                if (i != -1) {
-                    m += i;
-                } else {
-                    throw new IOException("Unexpected end of stream");
-                }
-            }
-            return TikaInputStream.get(bytes);
-        }
-    }
-
-    @Test
-    public void testTruncatedFiles() throws Exception {
-        // First up a truncated OOXML (zip) file
-       
-        // With only the data supplied, the best we can do is the container
-        Metadata m = new Metadata();
-        try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
-            assertEquals(
-                    MediaType.application("x-tika-ooxml"),
-                    detector.detect(xlsx, m));
-        }
-        
-        // With truncated data + filename, we can use the filename to specialise
-        m = new Metadata();
-        m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
-        try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
-            assertEquals(
-                    MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
-                    detector.detect(xlsx, m));
-        }
-
-        // Now a truncated OLE2 file 
-        m = new Metadata();
-        try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
-            assertEquals(
-                    MediaType.application("x-tika-msoffice"),
-                    detector.detect(xls, m));
-        }
-        
-        // Finally a truncated OLE2 file, with a filename available
-        m = new Metadata();
-        m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xls");
-        try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
-            assertEquals(
-                    MediaType.application("vnd.ms-excel"),
-                    detector.detect(xls, m));
-        }
-   }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java b/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
deleted file mode 100644
index e988aff..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.embedder;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStreamWriter;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TemporaryResources;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Property;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.txt.TXTParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Unit test for {@link ExternalEmbedder}s.
- */
-public class ExternalEmbedderTest {
-
-    protected static final DateFormat EXPECTED_METADATA_DATE_FORMATTER =
-            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
-    protected static final String DEFAULT_CHARSET = UTF_8.name();
-    private static final String COMMAND_METADATA_ARGUMENT_DESCRIPTION = "dc:description";
-    private static final String TEST_TXT_PATH = "/test-documents/testTXT.txt";
-
-    private TemporaryResources tmp = new TemporaryResources();
-
-    /**
-     * Gets the expected returned metadata value for the given field
-     *
-     * @param fieldName
-     * @return a prefix added to the field name
-     */
-    protected String getExpectedMetadataValueString(String fieldName, Date timestamp) {
-        return this.getClass().getSimpleName() + " embedded " + fieldName +
-                " on " + EXPECTED_METADATA_DATE_FORMATTER.format(timestamp);
-    }
-
-    /**
-     * Gets the tika <code>Metadata</code> object containing data to be
-     * embedded.
-     *
-     * @return the populated tika metadata object
-     */
-    protected Metadata getMetadataToEmbed(Date timestamp) {
-        Metadata metadata = new Metadata();
-        metadata.add(TikaCoreProperties.DESCRIPTION,
-                getExpectedMetadataValueString(TikaCoreProperties.DESCRIPTION.toString(), timestamp));
-        return metadata;
-    }
-
-    /**
-     * Gets the <code>Embedder</code> to test.
-     *
-     * @return the embedder under test
-     */
-    protected Embedder getEmbedder() {
-        ExternalEmbedder embedder = new ExternalEmbedder();
-        Map<Property, String[]> metadataCommandArguments = new HashMap<Property, String[]>(1);
-        metadataCommandArguments.put(TikaCoreProperties.DESCRIPTION,
-                new String[] { COMMAND_METADATA_ARGUMENT_DESCRIPTION });
-        embedder.setMetadataCommandArguments(metadataCommandArguments);
-        return embedder;
-    }
-
-    /**
-     * Gets the source input stream through standard Java resource loaders 
-     * before metadata has been embedded.
-     *
-     * @return a fresh input stream
-     */
-    protected InputStream getSourceStandardInputStream() {
-        return this.getClass().getResourceAsStream(TEST_TXT_PATH);
-    }
-
-    /**
-     * Gets the source input stream via {@link TikaInputStream}
-     * before metadata has been embedded.
-     *
-     * @return a fresh input stream
-     * @throws FileNotFoundException 
-     */
-    protected InputStream getSourceTikaInputStream() throws FileNotFoundException {
-        return TikaInputStream.get(getSourceInputFile());
-    }
-    
-    /**
-     * Gets the source input file through standard Java resource loaders
-     * before metadata has been embedded.
-     *
-     * @return a fresh input stream
-     * @throws FileNotFoundException 
-     */
-    protected File getSourceInputFile() throws FileNotFoundException {
-        URL origUrl = this.getClass().getResource(TEST_TXT_PATH);
-        if (origUrl == null) {
-            throw new FileNotFoundException("could not load " + TEST_TXT_PATH);
-        }
-        try {
-            return new File(origUrl.toURI());
-        } catch (URISyntaxException e) {
-            throw new FileNotFoundException(e.getMessage());
-        }
-    }
-
-    /**
-     * Gets the parser to use to verify the result of the embed operation.
-     *
-     * @return the parser to read embedded metadata
-     */
-    protected Parser getParser() {
-        return new TXTParser();
-    }
-
-    /**
-     * Whether or not the final result of reading the now embedded metadata is
-     * expected in the output of the external tool
-     *
-     * @return whether or not results are expected in command line output
-     */
-    protected boolean getIsMetadataExpectedInOutput() {
-        return true;
-    }
-
-    /**
-     * Tests embedding metadata then reading metadata to verify the results.
-     *
-     * @param isResultExpectedInOutput whether or not results are expected in command line output
-     */
-    protected void embedInTempFile(InputStream sourceInputStream, boolean isResultExpectedInOutput) {
-        Embedder embedder = getEmbedder();
-        
-        // TODO Move this check to ExternalEmbedder
-        String os = System.getProperty("os.name", "");
-        if (os.contains("Windows")) {
-            // Skip test on Windows
-            return;
-        }
-        
-        Date timestamp = new Date();
-        Metadata metadataToEmbed = getMetadataToEmbed(timestamp);
-
-        try {
-            File tempOutputFile = tmp.createTemporaryFile();
-            FileOutputStream tempFileOutputStream = new FileOutputStream(tempOutputFile);
-
-            // Embed the metadata into a copy of the original output stream
-            embedder.embed(metadataToEmbed, sourceInputStream, tempFileOutputStream, null);
-
-            ParseContext context = new ParseContext();
-            Parser parser = getParser();
-            context.set(Parser.class, parser);
-
-            // Setup the extracting content handler
-            ByteArrayOutputStream result = new ByteArrayOutputStream();
-            OutputStreamWriter outputWriter = new OutputStreamWriter(result,DEFAULT_CHARSET);
-            ContentHandler handler = new BodyContentHandler(outputWriter);
-
-            // Create a new metadata object to read the new metadata into
-            Metadata embeddedMetadata = new Metadata();
-
-            // Setup a re-read of the now embeded temp file
-            FileInputStream embeddedFileInputStream = new FileInputStream(tempOutputFile);
-
-            parser.parse(embeddedFileInputStream, handler, embeddedMetadata,
-                    context);
-
-            tmp.dispose();
-
-            String outputString = null;
-            if (isResultExpectedInOutput) {
-                outputString = result.toString(DEFAULT_CHARSET);
-            } else {
-                assertTrue("no metadata found", embeddedMetadata.size() > 0);
-            }
-
-            // Check each metadata property for the expected value
-            for (String metadataName : metadataToEmbed.names()) {
-                if (metadataToEmbed.get(metadataName) != null) {
-                    String expectedValue = metadataToEmbed.get(metadataName);
-                    boolean foundExpectedValue = false;
-                    if (isResultExpectedInOutput) {
-                        // just check that the entire output contains the expected string
-                        foundExpectedValue = outputString.contains(expectedValue);
-                    } else {
-                        if (embeddedMetadata.isMultiValued(metadataName)) {
-                            for (String embeddedValue : embeddedMetadata.getValues(metadataName)) {
-                                if (embeddedValue != null) {
-                                    if (embeddedValue.contains(expectedValue)) {
-                                        foundExpectedValue = true;
-                                        break;
-                                    }
-                                }
-                            }
-                        } else {
-                            String embeddedValue = embeddedMetadata.get(metadataName);
-                            assertNotNull("expected metadata for "
-                                    + metadataName + " not found",
-                                    embeddedValue);
-                            foundExpectedValue = embeddedValue.contains(expectedValue);
-                        }
-                    }
-                    assertTrue(
-                            "result did not contain expected appended metadata "
-                                    + metadataName + "="
-                                    + expectedValue,
-                            foundExpectedValue);
-                }
-            }
-        } catch (IOException e) {
-            fail(e.getMessage());
-        } catch (TikaException e) {
-            fail(e.getMessage());
-        } catch (SAXException e) {
-            fail(e.getMessage());
-        }
-    }
-    
-    protected void checkSourceFileExists() {
-        String message = "the original input file was deleted";
-        try {
-            File origInputFile = getSourceInputFile();
-            assertNotNull(message, origInputFile);
-            assertTrue(message, origInputFile.exists());
-        } catch (FileNotFoundException e) {
-            fail(message + ": " + e.getMessage());
-        }
-    }
-
-    /**
-     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceStandardInputStream()}
-     * 
-     * @throws IOException
-     */
-    @Test
-    public void testEmbedStandardInputStream() throws IOException {
-        embedInTempFile(getSourceStandardInputStream(), getIsMetadataExpectedInOutput());
-        checkSourceFileExists();
-    }
-    
-    /**
-     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceTikaInputStream()}
-     * 
-     * @throws IOException
-     */
-    @Test
-    public void testEmbedTikaInputStream() throws IOException {
-        embedInTempFile(getSourceTikaInputStream(), getIsMetadataExpectedInOutput());
-        checkSourceFileExists();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java b/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
deleted file mode 100644
index 7987630..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.mime;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import org.junit.Before;
-import org.junit.Test;
-
-public class MimeTypeTest {
-
-    private MimeTypes types;
-    private MimeType text;
-
-    @Before
-    public void setUp() throws MimeTypeException {
-        types = new MimeTypes();
-        text = types.forName("text/plain");
-    }
-
-    /** Test MimeType constructor */
-    @Test
-    public void testConstrctor() {
-        // Missing name
-        try {
-            new MimeType(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-    }
-
-    @Test
-    public void testIsValidName() {
-        assertTrue(MimeType.isValid("application/octet-stream"));
-        assertTrue(MimeType.isValid("text/plain"));
-        assertTrue(MimeType.isValid("foo/bar"));
-        assertTrue(MimeType.isValid("a/b"));
-
-        assertFalse(MimeType.isValid("application"));
-        assertFalse(MimeType.isValid("application/"));
-        assertFalse(MimeType.isValid("/"));
-        assertFalse(MimeType.isValid("/octet-stream"));
-        assertFalse(MimeType.isValid("application//octet-stream"));
-        assertFalse(MimeType.isValid("application/octet=stream"));
-        assertFalse(MimeType.isValid("application/\u00f6ctet-stream"));
-        assertFalse(MimeType.isValid("text/plain;"));
-        assertFalse(MimeType.isValid("text/plain; charset=UTF-8"));
-        try {
-            MimeType.isValid(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-    }
-
-    /** Test MimeType setDescription() */
-    @Test
-    public void testSetEmptyValues() {
-        try {
-            text.setDescription(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-        
-        try {
-            text.setAcronym(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-        
-        try {
-            text.addLink(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-
-        try {
-            text.setUniformTypeIdentifier(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java b/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
deleted file mode 100644
index be8a575..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.mime;
-
-import static org.apache.tika.mime.MediaType.OCTET_STREAM;
-import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import org.junit.Before;
-import org.junit.Test;
-
-public class MimeTypesTest {
-
-    private MimeTypes types;
-
-    private MediaTypeRegistry registry;
-
-    private MimeType binary;
-
-    private MimeType text;
-
-    private MimeType html;
-
-    @Before
-    public void setUp() throws MimeTypeException {
-        types = new MimeTypes();
-        registry = types.getMediaTypeRegistry();
-        binary = types.forName("application/octet-stream");
-        text = types.forName("text/plain");
-        types.addAlias(text, MediaType.parse("text/x-plain"));
-        html = types.forName("text/html");
-        types.setSuperType(html, TEXT_PLAIN);
-    }
-
-    @Test
-    public void testForName() throws MimeTypeException {
-        assertEquals(text, types.forName("text/plain"));
-        assertEquals(text, types.forName("TEXT/PLAIN"));
-
-        try {
-            types.forName("invalid");
-            fail("MimeTypeException not thrown on invalid type name");
-        } catch (MimeTypeException e) {
-            // expected
-        }
-    }
-
-    @Test
-    public void testRegisteredMimes() throws MimeTypeException {
-        String dummy = "text/xxxxx";
-        assertEquals(text, types.getRegisteredMimeType("text/plain"));
-        assertNull(types.getRegisteredMimeType(dummy));
-        assertNotNull(types.forName(dummy));
-        assertEquals(dummy, types.forName("text/xxxxx").getType().toString());
-        assertEquals(dummy, types.getRegisteredMimeType("text/xxxxx").getType().toString());
-        
-        try {
-            types.forName("invalid");
-            fail("MimeTypeException not thrown on invalid type name");
-        } catch (MimeTypeException e) {
-            // expected
-        }
-    }
-
-    @Test
-    public void testSuperType() throws MimeTypeException {
-        assertNull(registry.getSupertype(OCTET_STREAM));
-        assertEquals(OCTET_STREAM, registry.getSupertype(TEXT_PLAIN));
-        assertEquals(TEXT_PLAIN, registry.getSupertype(html.getType()));
-   }
-
-    @Test
-    public void testIsDescendantOf() {
-        assertFalse(registry.isSpecializationOf(OCTET_STREAM, OCTET_STREAM));
-        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, TEXT_PLAIN));
-        assertFalse(registry.isSpecializationOf(html.getType(), html.getType()));
-
-        assertTrue(registry.isSpecializationOf(html.getType(), OCTET_STREAM));
-        assertFalse(registry.isSpecializationOf(OCTET_STREAM, html.getType()));
-
-        assertTrue(registry.isSpecializationOf(html.getType(), TEXT_PLAIN));
-        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, html.getType()));
-
-        assertTrue(registry.isSpecializationOf(TEXT_PLAIN, OCTET_STREAM));
-        assertFalse(registry.isSpecializationOf(OCTET_STREAM, TEXT_PLAIN));
-    }
-
-    @Test
-    public void testCompareTo() {
-        assertTrue(binary.compareTo(binary) == 0);
-        assertTrue(binary.compareTo(text) != 0);
-        assertTrue(binary.compareTo(html) != 0);
-
-        assertTrue(text.compareTo(binary) != 0);
-        assertTrue(text.compareTo(text) == 0);
-        assertTrue(text.compareTo(html) != 0);
-
-        assertTrue(html.compareTo(binary) != 0);
-        assertTrue(html.compareTo(text) != 0);
-        assertTrue(html.compareTo(html) == 0);
-    }
-
-}


[07/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index a8bfaed..20f8760 100644
--- a/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -60,6 +60,7 @@ import org.apache.tika.sax.ContentHandlerDecorator;
 import org.apache.tika.sax.ToXMLContentHandler;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
@@ -105,74 +106,55 @@ public class PDFParserTest extends TikaTest {
 
     @Test
     public void testPdfParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        Metadata metadata = new Metadata();
-
-        InputStream stream = PDFParserTest.class.getResourceAsStream(
-                "/test-documents/testPDF.pdf");
-
-        String content = getText(stream, parser, metadata);
-
-        assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Bertrand Delacr\u00e9taz", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Bertrand Delacr\u00e9taz", metadata.get(Metadata.AUTHOR));
-        assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL));
-        assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE));
+        XMLResult r = getXML("testPDF.pdf");
+        assertEquals("application/pdf", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Bertrand Delacr\u00e9taz", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Bertrand Delacr\u00e9taz", r.metadata.get(Metadata.AUTHOR));
+        assertEquals("Firefox", r.metadata.get(TikaCoreProperties.CREATOR_TOOL));
+        assertEquals("Apache Tika - Apache Tika", r.metadata.get(TikaCoreProperties.TITLE));
 
         // Can't reliably test dates yet - see TIKA-451
 //        assertEquals("Sat Sep 15 10:02:31 BST 2007", metadata.get(Metadata.CREATION_DATE));
 //        assertEquals("Sat Sep 15 10:02:31 BST 2007", metadata.get(Metadata.LAST_MODIFIED));
 
-        assertContains("Apache Tika", content);
-        assertContains("Tika - Content Analysis Toolkit", content);
-        assertContains("incubator", content);
-        assertContains("Apache Software Foundation", content);
+        assertContains("Apache Tika", r.xml);
+        assertContains("Tika - Content Analysis Toolkit", r.xml);
+        assertContains("incubator", r.xml);
+        assertContains("Apache Software Foundation", r.xml);
         // testing how the end of one paragraph is separated from start of the next one
-        assertTrue("should have word boundary after headline",
-                !content.contains("ToolkitApache"));
-        assertTrue("should have word boundary between paragraphs",
-                !content.contains("libraries.Apache"));
+
+        // should have word boundary after headline
+        assertNotContained("ToolkitApache", r.xml);
+        // should have word boundary between paragraphs
+        assertNotContained("libraries.Apache", r.xml);
     }
 
     @Test
     public void testPdfParsingMetadataOnly() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PDFParserTest.class.getResourceAsStream(
-                "/test-documents/testPDF.pdf")) {
-            parser.parse(stream, null, metadata, new ParseContext());
-        }
-
-        assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Bertrand Delacr\u00e9taz", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL));
-        assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE));
+        XMLResult r = getXML("testPDF.pdf");
+        assertEquals("application/pdf", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Bertrand Delacr\u00e9taz", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Firefox", r.metadata.get(TikaCoreProperties.CREATOR_TOOL));
+        assertEquals("Apache Tika - Apache Tika", r.metadata.get(TikaCoreProperties.TITLE));
     }
 
     @Test
     public void testCustomMetadata() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        Metadata metadata = new Metadata();
-
-        InputStream stream = PDFParserTest.class.getResourceAsStream(
-                "/test-documents/testPDF-custommetadata.pdf");
-
-        String content = getText(stream, parser, metadata);
+        XMLResult r = getXML("testPDF-custommetadata.pdf");
 
-        assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Document author", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Document author", metadata.get(Metadata.AUTHOR));
-        assertEquals("Document title", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("application/pdf", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Document author", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Document author", r.metadata.get(Metadata.AUTHOR));
+        assertEquals("Document title", r.metadata.get(TikaCoreProperties.TITLE));
 
-        assertEquals("Custom Value", metadata.get("Custom Property"));
+        assertEquals("Custom Value", r.metadata.get("Custom Property"));
 
-        assertEquals("Array Entry 1", metadata.get("Custom Array"));
-        assertEquals(2, metadata.getValues("Custom Array").length);
-        assertEquals("Array Entry 1", metadata.getValues("Custom Array")[0]);
-        assertEquals("Array Entry 2", metadata.getValues("Custom Array")[1]);
+        assertEquals("Array Entry 1", r.metadata.get("Custom Array"));
+        assertEquals(2, r.metadata.getValues("Custom Array").length);
+        assertEquals("Array Entry 1", r.metadata.getValues("Custom Array")[0]);
+        assertEquals("Array Entry 2", r.metadata.getValues("Custom Array")[1]);
 
-        assertContains("Hello World!", content);
+        assertContains("Hello World!", r.xml);
     }
 
     /**
@@ -182,16 +164,9 @@ public class PDFParserTest extends TikaTest {
      */
     @Test
     public void testProtectedPDF() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-        ParseContext context = new ParseContext();
-
-        try (InputStream stream = PDFParserTest.class.getResourceAsStream(
-                "/test-documents/testPDF_protected.pdf")) {
-            parser.parse(stream, handler, metadata, context);
-        }
 
+        XMLResult r = getXML("testPDF_protected.pdf");
+        Metadata metadata = r.metadata;
         assertEquals("true", metadata.get("pdf:encrypted"));
         assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
@@ -200,27 +175,23 @@ public class PDFParserTest extends TikaTest {
         assertEquals("Speeches by Andrew G Haldane", metadata.get(Metadata.SUBJECT));
         assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata.get(TikaCoreProperties.TITLE));
 
-        String content = handler.toString();
+        String content = r.xml;
         assertContains("RETHINKING THE FINANCIAL NETWORK", content);
         assertContains("On 16 November 2002", content);
         assertContains("In many important respects", content);
 
 
         // Try again with an explicit empty password
-        handler = new BodyContentHandler();
         metadata = new Metadata();
 
-        context = new ParseContext();
+        ParseContext context = new ParseContext();
         context.set(PasswordProvider.class, new PasswordProvider() {
             public String getPassword(Metadata metadata) {
                 return "";
             }
         });
-
-        try (InputStream stream = PDFParserTest.class.getResourceAsStream(
-                "/test-documents/testPDF_protected.pdf")) {
-            parser.parse(stream, handler, metadata, context);
-        }
+        r = getXML("testPDF_protected.pdf", new AutoDetectParser(), metadata, context);
+        metadata = r.metadata;
         assertEquals("true", metadata.get("pdf:encrypted"));
 
         assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
@@ -234,7 +205,6 @@ public class PDFParserTest extends TikaTest {
         assertContains("In many important respects", content);
 
         //now test wrong password
-        handler = new BodyContentHandler();
         metadata = new Metadata();
         context = new ParseContext();
         context.set(PasswordProvider.class, new PasswordProvider() {
@@ -244,23 +214,20 @@ public class PDFParserTest extends TikaTest {
         });
 
         boolean ex = false;
-        try (InputStream stream = PDFParserTest.class.getResourceAsStream(
-                "/test-documents/testPDF_protected.pdf")) {
-            parser.parse(stream, handler, metadata, context);
+        try {
+            r = getXML("testPDF_protected.pdf", new AutoDetectParser(), metadata, context);
         } catch (EncryptedDocumentException e) {
             ex = true;
         }
-        content = handler.toString();
+        content = r.xml;
 
         assertTrue("encryption exception", ex);
         assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("true", metadata.get("pdf:encrypted"));
         //pdf:encrypted, X-Parsed-By and Content-Type
         assertEquals("very little metadata should be parsed", 3, metadata.names().length);
-        assertEquals(0, content.length());
 
         //now test wrong password with non sequential parser
-        handler = new BodyContentHandler();
         metadata = new Metadata();
         context = new ParseContext();
         context.set(PasswordProvider.class, new PasswordProvider() {
@@ -272,22 +239,21 @@ public class PDFParserTest extends TikaTest {
         config.setUseNonSequentialParser(true);
         context.set(PDFParserConfig.class, config);
 
-        ;
         ex = false;
-        try (InputStream stream = PDFParserTest.class.getResourceAsStream(
-                "/test-documents/testPDF_protected.pdf")) {
-            parser.parse(stream, handler, metadata, context);
+        try {
+            r = getXML("testPDF_protected.pdf", new AutoDetectParser(), metadata, context);
         } catch (EncryptedDocumentException e) {
             ex = true;
         }
-        content = handler.toString();
+
+        content = r.xml;
         assertTrue("encryption exception", ex);
         assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("true", metadata.get("pdf:encrypted"));
 
         //pdf:encrypted, X-Parsed-By and Content-Type
         assertEquals("very little metadata should be parsed", 3, metadata.names().length);
-        assertEquals(0, content.length());
+
     }
 
     @Test
@@ -619,6 +585,7 @@ public class PDFParserTest extends TikaTest {
      * TODO: more testing
      */
     @Test
+    @Ignore("this will be going away as soon as we upgrade to 2.0")
     public void testSequentialParser() throws Exception {
 
         Parser sequentialParser = new AutoDetectParser();
@@ -745,13 +712,13 @@ public class PDFParserTest extends TikaTest {
         //The current test doc does not contain any content in the signature area.
         //This just tests that a RuntimeException is not thrown.
         //TODO: find a better test file for this issue.
-        String xml = getXML("/testPDF_acroform3.pdf").xml;
+        String xml = getXML("testPDF_acroform3.pdf").xml;
         assertTrue("found", (xml.contains("<li>aTextField: TIKA-1226</li>")));
     }
 
     @Test // TIKA-1228, TIKA-1268
     public void testEmbeddedFilesInChildren() throws Exception {
-        String xml = getXML("/testPDF_childAttachments.pdf").xml;
+        String xml = getXML("testPDF_childAttachments.pdf").xml;
         //"regressiveness" exists only in Unit10.doc not in the container pdf document
         assertTrue(xml.contains("regressiveness"));
 
@@ -785,7 +752,7 @@ public class PDFParserTest extends TikaTest {
 
     @Test
     public void testEmbeddedFilesInAnnotations() throws Exception {
-        String xml = getXML("/testPDFFileEmbInAnnotation.pdf").xml;
+        String xml = getXML("testPDFFileEmbInAnnotation.pdf").xml;
 
         assertTrue(xml.contains("This is a Excel"));
     }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java
index 4398999..aa70106 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java
@@ -114,7 +114,7 @@ public class ISArchiveParser implements Parser {
 		InputStream stream = TikaInputStream.get(new File(this.location + investigation));
 		
 		ISATabUtils.parseInvestigation(stream, xhtml, metadata, context, this.studyFileName);
-		
+		stream.close();
 		xhtml.element("h1", "INVESTIGATION " + metadata.get("Investigation Identifier"));
 	}
 
@@ -130,6 +130,7 @@ public class ISArchiveParser implements Parser {
 			xhtml.element("h3", "ASSAY " + assayFileName);
 			InputStream stream = TikaInputStream.get(new File(this.location + assayFileName));
 			ISATabUtils.parseAssay(stream, xhtml, metadata, context);
+			stream.close();
 			xhtml.endElement("div");
 		}
 	}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
index 6a63eb4..ddbca81 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
@@ -21,11 +21,10 @@ package org.apache.tika.parser.netcdf;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Collections;
-import java.util.Set;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
@@ -37,11 +36,10 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
-
 import ucar.nc2.Attribute;
+import ucar.nc2.Dimension;
 import ucar.nc2.NetcdfFile;
 import ucar.nc2.Variable;
-import ucar.nc2.Dimension;
 
 /**
  * A {@link Parser} for <a
@@ -82,9 +80,10 @@ public class NetCDFParser extends AbstractParser {
                       Metadata metadata, ParseContext context) throws IOException,
             SAXException, TikaException {
 
-        TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources());
+        TikaInputStream tis = TikaInputStream.get(stream);
+        NetcdfFile ncFile = null;
         try {
-            NetcdfFile ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath());
+            ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath());
             metadata.set("File-Type-Description", ncFile.getFileTypeDescription());
             // first parse out the set of global attributes
             for (Attribute attr : ncFile.getGlobalAttributes()) {
@@ -129,9 +128,13 @@ public class NetCDFParser extends AbstractParser {
             xhtml.endElement("ul");
 
             xhtml.endDocument();
-
+            ncFile.close();
         } catch (IOException e) {
             throw new TikaException("NetCDF parse error", e);
+        } finally {
+            if (ncFile != null) {
+                ncFile.close();
+            }
         }
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
index ef31abc..373da0d 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
@@ -18,37 +18,22 @@ package org.apache.tika.parser.dif;
 
 import static org.junit.Assert.assertEquals;
 
-import java.io.InputStream;
-
 import org.apache.tika.TikaTest;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
 
 public class DIFParserTest extends TikaTest {
 
 	@Test
 	public void testDifMetadata() throws Exception {
-		Parser parser = new DIFParser();
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = DIFParser.class.getResourceAsStream(
-                "/test-documents/Zamora2010.dif")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        assertEquals(metadata.get("DIF-Entry_ID"),"00794186-48f9-11e3-9dcb-00c0f03d5b7c");
-        assertEquals(metadata.get("DIF-Metadata_Name"),"ACADIS IDN DIF");        
+        XMLResult r = getXML("Zamora2010.dif", new DIFParser());
+        assertEquals(r.metadata.get("DIF-Entry_ID"),"00794186-48f9-11e3-9dcb-00c0f03d5b7c");
+        assertEquals(r.metadata.get("DIF-Metadata_Name"),"ACADIS IDN DIF");
 
-        String content = handler.toString();
+        String content = r.xml;
         assertContains("Title: Zamora 2010 Using Sediment Geochemistry", content);
-        assertContains("Southernmost_Latitude : 78.833", content);
-        assertContains("Northernmost_Latitude : 79.016", content);
-        assertContains("Westernmost_Longitude : 11.64", content);
-        assertContains("Easternmost_Longitude : 13.34", content);
+        assertContains("Southernmost_Latitude : </td><td>78.833", content);
+        assertContains("Northernmost_Latitude : </td><td>79.016", content);
+        assertContains("Westernmost_Longitude : </td><td>11.64", content);
+        assertContains("Easternmost_Longitude : </td><td>13.34", content);
 	}
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
index 3603280..0bf67fb 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
@@ -17,44 +17,26 @@
 
 package org.apache.tika.parser.envi;
 
-import static org.apache.tika.TikaTest.assertContains;
-import static org.junit.Assert.assertNotNull;
-
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.ToXMLContentHandler;
+import org.apache.tika.TikaTest;
 import org.junit.Test;
 
 /**
  * Test cases to exercise the {@link EnviHeaderParser}.
  */
-public class EnviHeaderParserTest {
+public class EnviHeaderParserTest extends TikaTest {
     @Test
     public void testParseGlobalMetadata() throws Exception {
         if (System.getProperty("java.version").startsWith("1.5")) {
             return;
         }
 
-        Parser parser = new EnviHeaderParser();
-        ToXMLContentHandler handler = new ToXMLContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = EnviHeaderParser.class.getResourceAsStream(
-                "/test-documents/envi_test_header.hdr")) {
-            assertNotNull("Test ENVI file not found", stream);
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
+        XMLResult r = getXML("envi_test_header.hdr", new EnviHeaderParser());
         // Check content of test file
-        String content = handler.toString();
-        assertContains("<body><p>ENVI</p>", content);
-        assertContains("<p>samples = 2400</p>", content);
-        assertContains("<p>lines   = 2400</p>", content);
-        assertContains("<p>map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , units=Meters}</p>", content);
-        assertContains("content=\"application/envi.hdr\"", content);
-        assertContains("projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}", content);
+        assertContains("<body><p>ENVI</p>", r.xml);
+        assertContains("<p>samples = 2400</p>", r.xml);
+        assertContains("<p>lines   = 2400</p>", r.xml);
+        assertContains("<p>map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , units=Meters}</p>", r.xml);
+        assertContains("content=\"application/envi.hdr\"", r.xml);
+        assertContains("projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}", r.xml);
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
index cf37989..5d4c58c 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
@@ -49,7 +49,7 @@ public class TestGDALParser extends TikaTest {
     }
 
   @Test
-  public void testParseBasicInfo() {
+  public void testParseBasicInfo() throws Exception {
     assumeTrue(canRun());
     final String expectedDriver = "netCDF/Network Common Data Format";
     final String expectedUpperRight = "512.0,    0.0";
@@ -59,18 +59,9 @@ public class TestGDALParser extends TikaTest {
     final String expectedCoordinateSystem = "`'";
     final String expectedSize = "512, 512";
 
-    GDALParser parser = new GDALParser();
-    InputStream stream = TestGDALParser.class
-        .getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
-    Metadata met = new Metadata();
-    BodyContentHandler handler = new BodyContentHandler();
-    try {
-      parser.parse(stream, handler, met, new ParseContext());
-    } catch (Exception e) {
-      e.printStackTrace();
-      fail(e.getMessage());
-    }
 
+    XMLResult r = getXML("sresa1b_ncar_ccsm3_0_run1_200001.nc", new GDALParser());
+    Metadata met = r.metadata;
     assertNotNull(met);
     assertNotNull(met.get("Driver"));
     assertEquals(expectedDriver, met.get("Driver"));
@@ -91,7 +82,7 @@ public class TestGDALParser extends TikaTest {
   }
 
     @Test
-    public void testParseMetadata() {
+    public void testParseMetadata() throws Exception {
         assumeTrue(canRun());
         final String expectedNcInst = "NCAR (National Center for Atmospheric Research, Boulder, CO, USA)";
         final String expectedModelNameEnglish = "NCAR CCSM";
@@ -102,14 +93,10 @@ public class TestGDALParser extends TikaTest {
         final String expectedSub8Name = "\":ua";
         final String expectedSub8Desc = "[1x17x128x256] eastward_wind (32-bit floating-point)";
 
-        GDALParser parser = new GDALParser();
-        InputStream stream = TestGDALParser.class
-                .getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
-        Metadata met = new Metadata();
-        BodyContentHandler handler = new BodyContentHandler();
-        try {
-            parser.parse(stream, handler, met, new ParseContext());
-            assertNotNull(met);
+        XMLResult r = getXML("sresa1b_ncar_ccsm3_0_run1_200001.nc");
+        Metadata met = r.metadata;
+
+        assertNotNull(met);
             assertNotNull(met.get("NC_GLOBAL#institution"));
             assertEquals(expectedNcInst, met.get("NC_GLOBAL#institution"));
             assertNotNull(met.get("NC_GLOBAL#model_name_english"));
@@ -129,14 +116,11 @@ public class TestGDALParser extends TikaTest {
             assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
             assertNotNull(met.get("SUBDATASET_8_DESC"));
             assertEquals(expectedSub8Desc, met.get("SUBDATASET_8_DESC"));
-        } catch (Exception e) {
-            e.printStackTrace();
-            fail(e.getMessage());
-        }
     }
 
     @Test
     public void testParseFITS() {
+        //TODO: fix this...add spooling to tmp file to TikaTest
         String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits";
 
         assumeTrue(canRun());

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java
index 0d6fb74..0fbe7b3 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java
@@ -21,25 +21,30 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
-import org.junit.Test;
+
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
 import org.xml.sax.SAXException;
 
-public class GeoParserTest {
+public class GeoParserTest extends TikaTest {
 	private Parser geoparser = new GeoParser();
 
 	@Test
-	public void testFunctions() throws UnsupportedEncodingException,
-			IOException, SAXException, TikaException {
+	public void testFunctions() throws Exception {
+
+		/* if it's not available no tests to run */
+		if (!((GeoParser) geoparser).isAvailable())
+			return;
+
 		String text = "The millennial-scale cooling trend that followed the HTM coincides with the decrease in China "
 				+ "summer insolation driven by slow changes in Earth's orbit. Despite the nearly linear forcing, the transition from the HTM to "
 				+ "the Little Ice Age (1500-1900 AD) was neither gradual nor uniform. To understand how feedbacks and perturbations result in rapid changes, "
@@ -53,13 +58,7 @@ public class GeoParserTest {
 		GeoParserConfig config = new GeoParserConfig();
 		context.set(GeoParserConfig.class, config);
 
-		InputStream s = new ByteArrayInputStream(text.getBytes(UTF_8));
-		/* if it's not available no tests to run */
-		if (!((GeoParser) geoparser).isAvailable())
-			return;
-
-		geoparser.parse(s, new BodyContentHandler(), metadata, context);
-
+		XMLResult r = getXML(new ByteArrayInputStream(text.getBytes(UTF_8)), geoparser, metadata, context);
 		assertNotNull(metadata.get("Geographic_NAME"));
 		assertNotNull(metadata.get("Geographic_LONGITUDE"));
 		assertNotNull(metadata.get("Geographic_LATITUDE"));

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java
index acd0cb2..442b080 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java
@@ -17,45 +17,29 @@
 
 package org.apache.tika.parser.geoinfo;
 
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.geoinfo.GeographicInformationParser;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import java.io.*;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
 
-public class GeographicInformationParserTest {
+public class GeographicInformationParserTest extends TikaTest {
 
     @Test
-    public void testISO19139() throws Exception{
-        String path ="/test-documents/sampleFile.iso19139";
-		
-        Metadata metadata = new Metadata();
-        Parser parser=new org.apache.tika.parser.geoinfo.GeographicInformationParser();
-        ContentHandler contentHandler=new BodyContentHandler();
-        ParseContext parseContext=new ParseContext();
-        
-        InputStream inputStream = GeographicInformationParser.class.getResourceAsStream(path);
-       
-        parser.parse(inputStream, contentHandler, metadata, parseContext);
-
-        assertEquals("text/iso19139+xml", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("UTF-8", metadata.get("CharacterSet"));
-        assertEquals("https", metadata.get("TransferOptionsOnlineProtocol "));
-        assertEquals("browser", metadata.get("TransferOptionsOnlineProfile "));
-        assertEquals("Barrow Atqasuk ARCSS Plant", metadata.get("TransferOptionsOnlineName "));
-
-        String content = contentHandler.toString();
-        assertTrue(content.contains("Barrow Atqasuk ARCSS Plant"));
-        assertTrue(content.contains("GeographicElementWestBoundLatitude	-157.24"));
-        assertTrue(content.contains("GeographicElementEastBoundLatitude	-156.4"));
-        assertTrue(content.contains("GeographicElementNorthBoundLatitude	71.18"));
-        assertTrue(content.contains("GeographicElementSouthBoundLatitude	70.27"));
+    public void testISO19139() throws Exception {
+        XMLResult r = getXML("sampleFile.iso19139", new GeographicInformationParser());
+        assertEquals("text/iso19139+xml", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("UTF-8", r.metadata.get("CharacterSet"));
+        assertEquals("https", r.metadata.get("TransferOptionsOnlineProtocol "));
+        assertEquals("browser", r.metadata.get("TransferOptionsOnlineProfile "));
+        assertEquals("Barrow Atqasuk ARCSS Plant", r.metadata.get("TransferOptionsOnlineName "));
+
+        assertContains("Barrow Atqasuk ARCSS Plant", r.xml);
+        assertContains("<td>GeographicElementWestBoundLatitude</td>\t<td>-157.24</td>", r.xml);
+        assertContains("<td>GeographicElementEastBoundLatitude</td>\t<td>-156.4</td>", r.xml);
+        assertContains("<td>GeographicElementNorthBoundLatitude</td>\t<td>71.18</td>", r.xml);
+        assertContains("<td>GeographicElementSouthBoundLatitude</td>\t<td>70.27</td>", r.xml);
 
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java
index 6ccf6af..622d511 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java
@@ -18,36 +18,24 @@
 package org.apache.tika.parser.grib;
 
 //JDK imports
-import static org.junit.Assert.*;
-import java.io.InputStream;
+import static org.junit.Assert.assertNotNull;
 
-//TIKA imports
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.TikaTest;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import java.io.File;
+
+//TIKA imports
 /**
  * Test cases to exercise the {@link org.apache.tika.parser.grib.GribParser}.
  */
 
-public class GribParserTest {
+public class GribParserTest extends TikaTest {
 
     @Test
     public void testParseGlobalMetadata() throws Exception {
-        Parser parser = new GribParser();
-        Metadata metadata = new Metadata();
-        ContentHandler handler = new BodyContentHandler();
-        try (InputStream stream = GribParser.class.getResourceAsStream("/test-documents/gdas1.forecmwf.2014062612.grib2")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-        assertNotNull(metadata);
-        String content = handler.toString();
-        assertTrue(content.contains("dimensions:"));
-        assertTrue(content.contains("variables:"));
+        XMLResult r = getXML("gdas1.forecmwf.2014062612.grib2", new GribParser());
+        assertNotNull(r.metadata);
+        assertContains("dimensions:", r.xml);
+        assertContains("variables:", r.xml);
     }
 }
  

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java
index 9bda875..1ee4dc7 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java
@@ -17,39 +17,27 @@
 package org.apache.tika.parser.hdf;
 
 //JDK imports
+
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
-import java.io.InputStream;
-
-
-
+import org.apache.tika.TikaTest;
+import org.junit.Test;
 
 //TIKA imports
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.hdf.HDFParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
 
 /**
  * 
  * Test suite for the {@link HDFParser}.
  * 
  */
-public class HDFParserTest {
+public class HDFParserTest extends TikaTest {
 
     @Test
     public void testParseGlobalMetadata() throws Exception {
         if(System.getProperty("java.version").startsWith("1.5")) {
             return;
         }
-        Parser parser = new HDFParser();
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
         /*
          * this is a publicly available HDF5 file from the MLS mission:
          * 
@@ -57,12 +45,10 @@ public class HDFParserTest {
          * ftp://acdisc.gsfc.nasa.gov/data/s4pa///Aura_MLS_Level2/ML2O3.002//2009
          * /MLS-Aura_L2GP-O3_v02-23-c01_2009d122.he5
          */
-        try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.he5")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
 
-        assertNotNull(metadata);
-        assertEquals("5", metadata.get("GranuleMonth"));
+        XMLResult r = getXML("test.he5", new HDFParser());
+        assertNotNull(r.metadata);
+        assertEquals("5", r.metadata.get("GranuleMonth"));
     }
 
     @Test
@@ -70,23 +56,17 @@ public class HDFParserTest {
        if(System.getProperty("java.version").startsWith("1.5")) {
           return;
       }
-      Parser parser = new HDFParser();
-      ContentHandler handler = new BodyContentHandler();
-      Metadata metadata = new Metadata();
 
       /*
        * this is a publicly available HDF4 file from the HD4 examples:
        * 
        * http://www.hdfgroup.org/training/hdf4_chunking/Chunkit/bin/input54kmdata.hdf
        */
-        try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.hdf")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-      assertNotNull(metadata);
-      assertEquals("Direct read of HDF4 file through CDM library", metadata.get("_History"));
-      assertEquals("Ascending", metadata.get("Pass"));
+      XMLResult r = getXML("test.hdf", new HDFParser());
+      assertNotNull(r.metadata);
+      assertEquals("Direct read of HDF4 file through CDM library", r.metadata.get("_History"));
+      assertEquals("Ascending", r.metadata.get("Pass"));
       assertEquals("Hierarchical Data Format, version 4",
-      metadata.get("File-Type-Description"));
+      r.metadata.get("File-Type-Description"));
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java
index ce4299c..fcc71f5 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java
@@ -17,44 +17,72 @@
 
 package org.apache.tika.parser.isatab;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
 
-import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
 
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
+import org.apache.tika.TikaTest;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
 
-public class ISArchiveParserTest {
+public class ISArchiveParserTest extends TikaTest {
+
+	static Path tmpDir;
+    final static String ISA_SUBDIR = "testISATab_BII-I-1";
+    final static String[] ISA_FILES = {
+            "a_bii-s-2_metabolite profiling_NMR spectroscopy.txt",
+            "a_metabolome.txt",
+            "a_microarray.txt",
+            "a_proteome.txt",
+            "a_transcriptome.txt",
+            "i_investigation.txt"
+    };
+
+    @BeforeClass
+	public static void createTempDir() throws Exception {
+        tmpDir = Files.createTempDirectory(ISA_SUBDIR);
+        for (String isaFile : ISA_FILES) {
+            String isaPath = "test-documents/"+ISA_SUBDIR+"/"+isaFile;
+            Files.copy(ISArchiveParserTest.class.getClassLoader().getResourceAsStream(isaPath),
+                    tmpDir.resolve(isaFile));
+        }
+    }
+	@AfterClass
+    public static void deleteTempDir() throws Exception {
+        for (String isaFile : ISA_FILES) {
+            Path p = tmpDir.resolve(isaFile);
+            Files.delete(p);
+        }
+        Files.delete(tmpDir);
+    }
 
 	@Test
 	public void testParseArchive() throws Exception {
-		String path = "/test-documents/testISATab_BII-I-1/s_BII-S-1.txt";
-		
-		Parser parser = new ISArchiveParser(ISArchiveParserTest.class.getResource("/test-documents/testISATab_BII-I-1/").toURI().getPath());
-		//Parser parser = new AutoDetectParser();
-		
-		ContentHandler handler = new BodyContentHandler();
-		Metadata metadata = new Metadata();
-		ParseContext context = new ParseContext();
-		try (InputStream stream = ISArchiveParserTest.class.getResourceAsStream(path)) {
-			parser.parse(stream, handler, metadata, context);
-		}
-		
+
+		Parser parser = new ISArchiveParser(tmpDir.toString());
+		XMLResult r = getXML(ISA_SUBDIR+"/s_BII-S-1.txt",
+					parser);
+
 		// INVESTIGATION
-		assertEquals("Invalid Investigation Identifier", "BII-I-1", metadata.get("Investigation Identifier"));
-		assertEquals("Invalid Investigation Title", "Growth control of the eukaryote cell: a systems biology study in yeast", metadata.get("Investigation Title"));
+		assertEquals("Invalid Investigation Identifier", "BII-I-1",
+				r.metadata.get("Investigation Identifier"));
+		assertEquals("Invalid Investigation Title",
+				"Growth control of the eukaryote cell: a systems biology study in yeast",
+				r.metadata.get("Investigation Title"));
 		
 		// INVESTIGATION PUBLICATIONS
-		assertEquals("Invalid Investigation PubMed ID", "17439666", metadata.get("Investigation PubMed ID")); 
-		assertEquals("Invalid Investigation Publication DOI", "doi:10.1186/jbiol54", metadata.get("Investigation Publication DOI"));
+		assertEquals("Invalid Investigation PubMed ID", "17439666",
+				r.metadata.get("Investigation PubMed ID"));
+		assertEquals("Invalid Investigation Publication DOI", "doi:10.1186/jbiol54",
+				r.metadata.get("Investigation Publication DOI"));
 		
 		// INVESTIGATION CONTACTS
-		assertEquals("Invalid Investigation Person Last Name", "Oliver", metadata.get("Investigation Person Last Name")); 
-		assertEquals("Invalid Investigation Person First Name", "Stephen", metadata.get("Investigation Person First Name"));
+		assertEquals("Invalid Investigation Person Last Name", "Oliver",
+				r.metadata.get("Investigation Person Last Name"));
+		assertEquals("Invalid Investigation Person First Name", "Stephen",
+				r.metadata.get("Investigation Person First Name"));
 	}
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java
index 0b31fea..aee5d62 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java
@@ -16,65 +16,39 @@
  */
 package org.apache.tika.parser.mat;
 
-import static org.apache.tika.TikaTest.assertContains;
 import static org.junit.Assert.assertEquals;
 
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.ToXMLContentHandler;
+import org.apache.tika.TikaTest;
 import org.junit.Test;
 
 /**
  * Test cases to exercise the {@link MatParser}.
  */
-public class MatParserTest {
+public class MatParserTest extends TikaTest {
     @Test
     public void testParser() throws Exception {
-        AutoDetectParser parser = new AutoDetectParser();
-        ToXMLContentHandler handler = new ToXMLContentHandler();
-        Metadata metadata = new Metadata();
-        String path = "/test-documents/breidamerkurjokull_radar_profiles_2009.mat";
-
-        try (InputStream stream = MatParser.class.getResourceAsStream(path)) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
 
+        XMLResult r = getXML("breidamerkurjokull_radar_profiles_2009.mat");
         // Check Metadata
-        assertEquals("PCWIN64", metadata.get("platform"));
-        assertEquals("MATLAB 5.0 MAT-file", metadata.get("fileType"));
-        assertEquals("IM", metadata.get("endian"));
-        assertEquals("Thu Feb 21 15:52:49 2013", metadata.get("createdOn"));
+        assertEquals("PCWIN64", r.metadata.get("platform"));
+        assertEquals("MATLAB 5.0 MAT-file", r.metadata.get("fileType"));
+        assertEquals("IM", r.metadata.get("endian"));
+        assertEquals("Thu Feb 21 15:52:49 2013", r.metadata.get("createdOn"));
 
         // Check Content
-        String content = handler.toString();
-
-        assertContains("<li>[1x909  double array]</li>", content);
-        assertContains("<p>c1:[1x1  struct array]</p>", content);
-        assertContains("<li>[1024x1  double array]</li>", content);
-        assertContains("<p>b1:[1x1  struct array]</p>", content);
-        assertContains("<p>a1:[1x1  struct array]</p>", content);
-        assertContains("<li>[1024x1261  double array]</li>", content);
-        assertContains("<li>[1x1  double array]</li>", content);
-        assertContains("</body></html>", content);
+        assertContains("<li>[1x909  double array]</li>", r.xml);
+        assertContains("<p>c1:[1x1  struct array]</p>", r.xml);
+        assertContains("<li>[1024x1  double array]</li>", r.xml);
+        assertContains("<p>b1:[1x1  struct array]</p>", r.xml);
+        assertContains("<p>a1:[1x1  struct array]</p>", r.xml);
+        assertContains("<li>[1024x1261  double array]</li>", r.xml);
+        assertContains("<li>[1x1  double array]</li>", r.xml);
+        assertContains("</body></html>", r.xml);
     }
 
     @Test
     public void testParserForText() throws Exception {
-        Parser parser = new MatParser();
-        ToXMLContentHandler handler = new ToXMLContentHandler();
-        Metadata metadata = new Metadata();
-        String path = "/test-documents/test_mat_text.mat";
-
-        try (InputStream stream = MatParser.class.getResourceAsStream(path)) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        // Check Content
-        String content = handler.toString();
-        assertContains("<p>double:[2x2  double array]</p>", content);
+        XMLResult r = getXML("test_mat_text.mat", new MatParser());
+        assertContains("<p>double:[2x2  double array]</p>", r.xml);
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java
index 3cc1df8..7d0f2e8 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java
@@ -17,54 +17,42 @@
 package org.apache.tika.parser.netcdf;
 
 //JDK imports
-import java.io.InputStream;
 
-//TIKA imports
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
 
-import static org.apache.tika.TikaTest.assertContains;
-import static org.junit.Assert.assertEquals;
+//TIKA imports
 
 /**
  * Test cases to exercise the {@link NetCDFParser}.
  */
-public class NetCDFParserTest {
+public class NetCDFParserTest extends TikaTest {
 
     @Test
     public void testParseGlobalMetadata() throws Exception {
-        Parser parser = new NetCDFParser();
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = NetCDFParser.class
-                .getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
 
-        assertEquals(metadata.get(TikaCoreProperties.TITLE),
+        XMLResult r = getXML("sresa1b_ncar_ccsm3_0_run1_200001.nc", new NetCDFParser());
+        assertEquals(r.metadata.get(TikaCoreProperties.TITLE),
                 "model output prepared for IPCC AR4");
-        assertEquals(metadata.get(Metadata.CONTACT), "ccsm@ucar.edu");
-        assertEquals(metadata.get(Metadata.PROJECT_ID),
+        assertEquals(r.metadata.get(Metadata.CONTACT), "ccsm@ucar.edu");
+        assertEquals(r.metadata.get(Metadata.PROJECT_ID),
                 "IPCC Fourth Assessment");
-        assertEquals(metadata.get(Metadata.CONVENTIONS), "CF-1.0");
-        assertEquals(metadata.get(Metadata.REALIZATION), "1");
-        assertEquals(metadata.get(Metadata.EXPERIMENT_ID),
+        assertEquals(r.metadata.get(Metadata.CONVENTIONS), "CF-1.0");
+        assertEquals(r.metadata.get(Metadata.REALIZATION), "1");
+        assertEquals(r.metadata.get(Metadata.EXPERIMENT_ID),
                 "720 ppm stabilization experiment (SRESA1B)");
-        assertEquals(metadata.get("File-Type-Description"), 
+        assertEquals(r.metadata.get("File-Type-Description"),
                 "NetCDF-3/CDM");
 
-        String content = handler.toString();
-        assertContains("long_name = \"Surface area\"", content);
-        assertContains("float area(lat=128, lon=256)", content);
-        assertContains("float lat(lat=128)", content);
-        assertContains("double lat_bnds(lat=128, bnds=2)", content);
-        assertContains("double lon_bnds(lon=256, bnds=2)", content);
+        assertContains("long_name = \"Surface area\"", r.xml);
+        assertContains("float area(lat=128, lon=256)", r.xml);
+        assertContains("float lat(lat=128)", r.xml);
+        assertContains("double lat_bnds(lat=128, bnds=2)", r.xml);
+        assertContains("double lon_bnds(lon=256, bnds=2)", r.xml);
         
 
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java
index 5f197d2..1c5b2db 100644
--- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java
+++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java
@@ -14,21 +14,19 @@
 package org.apache.tika.parser.strings;
 
 import static org.apache.tika.parser.strings.StringsParser.getStringsProg;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assume.assumeTrue;
 
-import java.io.InputStream;
 import java.util.Arrays;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.external.ExternalParser;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
 
-public class StringsParserTest {
+public class StringsParserTest extends TikaTest {
 	public static boolean canRun() {
 		StringsConfig config = new StringsConfig();
 		String[] checkCmd = {config.getStringsPath() + getStringsProg(), "--version"};
@@ -40,7 +38,7 @@ public class StringsParserTest {
 	public void testParse() throws Exception {
 		assumeTrue(canRun());
 		
-		String resource = "/test-documents/testOCTET_header.dbase3";
+		String resource = "testOCTET_header.dbase3";
 
 		String[] content = { "CLASSNO", "TITLE", "ITEMNO", "LISTNO", "LISTDATE" };
 		
@@ -50,22 +48,15 @@ public class StringsParserTest {
 		FileConfig fileConfig = new FileConfig();
 
 		Parser parser = new StringsParser();
-		ContentHandler handler = new BodyContentHandler();
-		Metadata metadata = new Metadata();
-
 		ParseContext context = new ParseContext();
 		context.set(StringsConfig.class, stringsConfig);
 		context.set(FileConfig.class, fileConfig);
-
-		try (InputStream stream = StringsParserTest.class.getResourceAsStream(resource)) {
-			parser.parse(stream, handler, metadata, context);
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
+		Metadata metadata = new Metadata();
+		XMLResult r = getXML(resource, parser, metadata, context);
 
 		// Content
 		for (String word : content) {
-			assertTrue(handler.toString().contains(word));
+			assertTrue(r.xml.contains(word));
 		}
 		
 		// Metadata

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
index 050ef15..9064597 100644
--- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
+++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
@@ -23,13 +23,14 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 
+import org.apache.tika.TikaTest;
 import org.junit.Test;
 
-public class CharsetDetectorTest {
+public class CharsetDetectorTest extends TikaTest {
 
     @Test
     public void testTagDropper() throws IOException {
-        try (InputStream in = CharsetDetectorTest.class.getResourceAsStream("/test-documents/resume.html")) {
+        try (InputStream in = getTestDocumentAsStream("resume.html")) {
             CharsetDetector detector = new CharsetDetector();
             detector.enableInputFilter(true);
             detector.setText(in);
@@ -52,7 +53,7 @@ public class CharsetDetectorTest {
 
     @Test
     public void testEmptyOrNullDeclaredCharset() throws IOException {
-        try (InputStream in = CharsetDetectorTest.class.getResourceAsStream("/test-documents/resume.html")) {
+        try (InputStream in = getTestDocumentAsStream("resume.html")) {
             CharsetDetector detector = new CharsetDetector();
             Reader reader = detector.getReader(in, null);
             assertTrue(reader.ready());

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
index 3de5eac..6d1c99a 100644
--- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
+++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
@@ -18,13 +18,13 @@ package org.apache.tika.parser.txt;
 
 import static java.nio.charset.StandardCharsets.ISO_8859_1;
 import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.tika.TikaTest.assertContains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
 import java.io.ByteArrayInputStream;
 import java.io.StringWriter;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
@@ -35,7 +35,7 @@ import org.junit.Test;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.helpers.DefaultHandler;
 
-public class TXTParserTest {
+public class TXTParserTest extends TikaTest {
 
     private Parser parser = new TXTParser();
 
@@ -233,39 +233,21 @@ public class TXTParserTest {
 
     @Test
     public void testCP866() throws Exception {
-        Metadata metadata = new Metadata();
-        StringWriter writer = new StringWriter();
-        parser.parse(
-                TXTParserTest.class.getResourceAsStream("/test-documents/russian.cp866.txt"),
-                new WriteOutContentHandler(writer),
-                metadata,
-                new ParseContext());
-
-        assertEquals("text/plain; charset=IBM866", metadata.get(Metadata.CONTENT_TYPE));
+        XMLResult r = getXML("russian.cp866.txt", parser);
+        assertEquals("text/plain; charset=IBM866", r.metadata.get(Metadata.CONTENT_TYPE));
     }
 
     @Test
     public void testEBCDIC_CP500() throws Exception {
-        Metadata metadata = new Metadata();
-        StringWriter writer = new StringWriter();
-        parser.parse(
-                TXTParserTest.class.getResourceAsStream("/test-documents/english.cp500.txt"),
-                new WriteOutContentHandler(writer),
-                metadata,
-                new ParseContext());
-
-        assertEquals("text/plain; charset=IBM500", metadata.get(Metadata.CONTENT_TYPE));
+        XMLResult r = getXML("english.cp500.txt", parser);
+        assertEquals("text/plain; charset=IBM500", r.metadata.get(Metadata.CONTENT_TYPE));
 
         // Additional check that it isn't too eager on short blocks of text
-        metadata = new Metadata();
-        writer = new StringWriter();
-        parser.parse(
+        r = getXML(
                 new ByteArrayInputStream("<html><body>hello world</body></html>".getBytes(ISO_8859_1)),
-                new WriteOutContentHandler(writer),
-                metadata,
-                new ParseContext());
+                parser, new Metadata());
 
-        assertEquals("text/plain; charset=ISO-8859-1", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("text/plain; charset=ISO-8859-1", r.metadata.get(Metadata.CONTENT_TYPE));
     }
 
     /**
@@ -276,20 +258,17 @@ public class TXTParserTest {
     @Test
     public void testCharsetDetectionWithShortSnipet() throws Exception {
         final String text = "Hello, World!";
-
-        Metadata metadata = new Metadata();
-        parser.parse(
-                new ByteArrayInputStream(text.getBytes(UTF_8)),
-                new BodyContentHandler(), metadata, new ParseContext());
-        assertEquals("text/plain; charset=ISO-8859-1", metadata.get(Metadata.CONTENT_TYPE));
+        XMLResult r = getXML(
+                new ByteArrayInputStream(text.getBytes(UTF_8)), parser, new Metadata());
+        assertEquals("text/plain; charset=ISO-8859-1", r.metadata.get(Metadata.CONTENT_TYPE));
 
         // Now verify that if we tell the parser the encoding is UTF-8, that's what
         // we get back (see TIKA-868)
-        metadata.set(Metadata.CONTENT_TYPE, "application/binary; charset=UTF-8");
+        r.metadata.set(Metadata.CONTENT_TYPE, "application/binary; charset=UTF-8");
         parser.parse(
                 new ByteArrayInputStream(text.getBytes(UTF_8)),
-                new BodyContentHandler(), metadata, new ParseContext());
-        assertEquals("text/plain; charset=UTF-8", metadata.get(Metadata.CONTENT_TYPE));
+                new BodyContentHandler(), r.metadata, new ParseContext());
+        assertEquals("text/plain; charset=UTF-8", r.metadata.get(Metadata.CONTENT_TYPE));
     }
 
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
index 22094f4..665151d 100644
--- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
+++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
@@ -20,26 +20,17 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.io.InputStream;
-
 import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.helpers.DefaultHandler;
 
 public class DcXMLParserTest extends TikaTest {
 
     @Test
     public void testXMLParserAsciiChars() throws Exception {
-        try (InputStream input = DcXMLParserTest.class.getResourceAsStream(
-                "/test-documents/testXML.xml")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new DcXMLParser().parse(input, handler, metadata);
-
+        XMLResult result = getXML("testXML.xml", new DcXMLParser());
+        Metadata metadata = result.metadata;
             assertEquals(
                     "application/xml",
                     metadata.get(Metadata.CONTENT_TYPE));
@@ -74,22 +65,17 @@ public class DcXMLParserTest extends TikaTest {
             assertEquals("Fr", metadata.get(TikaCoreProperties.LANGUAGE));
             assertTrue(metadata.get(TikaCoreProperties.RIGHTS).contains("testing chars"));
 
-            String content = handler.toString();
-            assertContains("Tika test document", content);
+            assertContains("Tika test document", result.xml);
 
             assertEquals("2000-12-01T00:00:00.000Z", metadata.get(TikaCoreProperties.CREATED));
-        }
+
     }
     
     @Test
     public void testXMLParserNonAsciiChars() throws Exception {
-        try (InputStream input = DcXMLParserTest.class.getResourceAsStream("/test-documents/testXML.xml")) {
-            Metadata metadata = new Metadata();
-            new DcXMLParser().parse(input, new DefaultHandler(), metadata);
-
-            final String expected = "Archim\u00E8de et Lius \u00E0 Ch\u00E2teauneuf testing chars en \u00E9t\u00E9";
-            assertEquals(expected, metadata.get(TikaCoreProperties.RIGHTS));
-        }
+        XMLResult r = getXML("testXML.xml", new DcXMLParser());
+        final String expected = "Archim\u00E8de et Lius \u00E0 Ch\u00E2teauneuf testing chars en \u00E9t\u00E9";
+        assertEquals(expected, r.metadata.get(TikaCoreProperties.RIGHTS));
     }
 
     // TIKA-1048

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java
index 20227a6..536f9d7 100644
--- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java
+++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java
@@ -18,13 +18,10 @@ package org.apache.tika.parser.xml;
 
 import static org.junit.Assert.assertEquals;
 
-import java.io.InputStream;
-
 import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.TeeContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
@@ -38,52 +35,45 @@ public class EmptyAndDuplicateElementsXMLParserTest extends TikaTest {
 
     @Test
     public void testDefaultBehavior() throws Exception {
-        try (InputStream input = EmptyAndDuplicateElementsXMLParserTest.class.getResourceAsStream(
-                "/test-documents/testXML3.xml")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new DefaultCustomXMLTestParser().parse(input, handler, metadata, new ParseContext());
+        XMLResult r = getXML("testXML3.xml", new DefaultCustomXMLTestParser());
+        Metadata metadata = r.metadata;
 
-            assertEquals(4, metadata.getValues(FIRST_NAME).length);
-            assertEquals(2, metadata.getValues(LAST_NAME).length);
+        assertEquals(4, metadata.getValues(FIRST_NAME).length);
+        assertEquals(2, metadata.getValues(LAST_NAME).length);
 
-            assertEquals("John", metadata.getValues(FIRST_NAME)[0]);
-            assertEquals("Smith", metadata.getValues(LAST_NAME)[0]);
+        assertEquals("John", metadata.getValues(FIRST_NAME)[0]);
+        assertEquals("Smith", metadata.getValues(LAST_NAME)[0]);
 
-            assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]);
-            assertEquals("Doe", metadata.getValues(LAST_NAME)[1]);
+        assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]);
+        assertEquals("Doe", metadata.getValues(LAST_NAME)[1]);
 
-            // We didn't know Bob's last name, but now we don't know an entry existed
-            assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]);
+        // We didn't know Bob's last name, but now we don't know an entry existed
+        assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]);
 
-            // We don't know Kate's last name because it was a duplicate
-            assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]);
-        }
+        // We don't know Kate's last name because it was a duplicate
+        assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]);
     }
     
     @Test
     public void testEmptiesAndRepeats() throws Exception {
-        try (InputStream input = EmptyAndDuplicateElementsXMLParserTest.class.getResourceAsStream(
-                "/test-documents/testXML3.xml")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new AllowEmptiesAndDuplicatesCustomXMLTestParser().parse(input, handler, metadata, new ParseContext());
+        XMLResult r = getXML("testXML3.xml", new AllowEmptiesAndDuplicatesCustomXMLTestParser());
+        Metadata metadata = r.metadata;
 
-            assertEquals(4, metadata.getValues(FIRST_NAME).length);
-            assertEquals(4, metadata.getValues(LAST_NAME).length);
+        assertEquals(4, metadata.getValues(FIRST_NAME).length);
+        assertEquals(4, metadata.getValues(LAST_NAME).length);
 
-            assertEquals("John", metadata.getValues(FIRST_NAME)[0]);
-            assertEquals("Smith", metadata.getValues(LAST_NAME)[0]);
+        assertEquals("John", metadata.getValues(FIRST_NAME)[0]);
+        assertEquals("Smith", metadata.getValues(LAST_NAME)[0]);
 
-            assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]);
-            assertEquals("Doe", metadata.getValues(LAST_NAME)[1]);
+        assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]);
+        assertEquals("Doe", metadata.getValues(LAST_NAME)[1]);
 
-            assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]);
-            assertEquals("", metadata.getValues(LAST_NAME)[2]);
+        assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]);
+        assertEquals("", metadata.getValues(LAST_NAME)[2]);
+
+        assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]);
+        assertEquals("Smith", metadata.getValues(LAST_NAME)[3]);
 
-            assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]);
-            assertEquals("Smith", metadata.getValues(LAST_NAME)[3]);
-        }
     }
     
     private class DefaultCustomXMLTestParser extends XMLParser {

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java
index 62454fa..aee7307 100644
--- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java
+++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java
@@ -16,38 +16,29 @@
  */
 package org.apache.tika.parser.xml;
 
-import static org.apache.tika.TikaTest.assertContains;
 import static org.junit.Assert.assertEquals;
 
 import java.io.InputStream;
 
-import org.apache.tika.TikaTest.TrackingHandler;
+import org.apache.tika.TikaTest;
 import org.apache.tika.extractor.ContainerExtractor;
 import org.apache.tika.extractor.ParserContainerExtractor;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
-import org.xml.sax.ContentHandler;
 
-public class FictionBookParserTest {
+public class FictionBookParserTest extends TikaTest {
   
     @Test
     public void testFB2() throws Exception {
-        try (InputStream input = FictionBookParserTest.class.getResourceAsStream("/test-documents/test.fb2")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new FictionBookParser().parse(input, handler, metadata, new ParseContext());
-            String content = handler.toString();
-
-            assertContains("1812", content);
-        }
+        XMLResult r = getXML("test.fb2", new FictionBookParser(), new Metadata(), new ParseContext());
+        assertContains("1812", r.xml);
     }
 
     @Test
     public void testEmbedded() throws Exception {
-        try (InputStream input = FictionBookParserTest.class.getResourceAsStream("/test-documents/test.fb2")) {
+        try (InputStream input = getTestDocumentAsStream("test.fb2")) {
             ContainerExtractor extractor = new ParserContainerExtractor();
             TikaInputStream stream = TikaInputStream.get(input);
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
deleted file mode 100644
index 67207d2..0000000
--- a/tika-parsers/pom.xml
+++ /dev/null
@@ -1,333 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.tika</groupId>
-    <artifactId>tika-parent</artifactId>
-    <version>2.0-SNAPSHOT</version>
-    <relativePath>../tika-parent/pom.xml</relativePath>
-  </parent>
-
-  <artifactId>tika-parsers</artifactId>
-  <packaging>bundle</packaging>
-  <name>Apache Tika parsers</name>
-  <url>http://tika.apache.org/</url>
-
-  <properties>
-    <vorbis.version>0.6</vorbis.version>
-  </properties>
-
-  <dependencies>
-    <!-- Optional OSGi dependency, used only when running within OSGi -->
-    <dependency>
-      <groupId>org.osgi</groupId>
-      <artifactId>org.osgi.core</artifactId>
-      <version>4.0.0</version>
-      <scope>provided</scope>
-      <optional>true</optional>
-    </dependency>
-
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-multimedia-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-advanced-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-cad-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-code-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-crypto-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-database-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-ebook-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-journal-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-office-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-package-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-pdf-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-scientific-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-text-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-web-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <!-- Optional OSGi dependencies, used only when running within OSGi -->
-    <dependency>
-      <groupId>org.apache.felix</groupId>
-      <artifactId>org.apache.felix.scr.annotations</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <!-- Externally Maintained Parsers -->
-    <dependency>
-      <groupId>org.gagravarr</groupId>
-      <artifactId>vorbis-java-tika</artifactId>
-      <version>${vorbis.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.gagravarr</groupId>
-      <artifactId>vorbis-java-core</artifactId>
-      <version>${vorbis.version}</version>
-    </dependency>
-
-    <!-- Test dependencies -->
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-log4j12</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika-test-resources</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.felix</groupId>
-        <artifactId>maven-bundle-plugin</artifactId>
-        <extensions>true</extensions>
-        <configuration>
-          <instructions>
-            <Bundle-DocURL>${project.url}</Bundle-DocURL>
-            <Bundle-Activator>
-              org.apache.tika.parser.internal.Activator
-            </Bundle-Activator>
-            <Import-Package>
-              org.w3c.dom,
-              org.apache.tika.*,
-              *;resolution:=optional
-            </Import-Package>
-          </instructions>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.rat</groupId>
-        <artifactId>apache-rat-plugin</artifactId>
-      </plugin>
-      
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <version>2.10</version>
-        <executions>
-          <execution>
-            <id>unpack</id>
-            <phase>compile</phase>
-            <goals>
-              <goal>unpack</goal>
-            </goals>
-            <configuration>
-              <artifactItems>
-                <artifactItem>
-                  <groupId>${project.groupId}</groupId>
-                  <artifactId>tika-test-resources</artifactId>
-                  <version>${project.version}</version>
-                  <type>test-jar</type>
-                  <overWrite>true</overWrite>
-                  <outputDirectory>${project.build.testOutputDirectory}</outputDirectory>
-                </artifactItem>
-              </artifactItems>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <version>2.4.2</version>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <createDependencyReducedPom>
-                false
-              </createDependencyReducedPom>
-              <artifactSet>
-                <includes>
-                  <include>org.apache.tika:tika-parser-multimedia-module</include>
-                  <include>org.apache.tika:tika-parser-advanced-module</include>
-                  <include>org.apache.tika:tika-parser-cad-module</include>
-                  <include>org.apache.tika:tika-parser-code-module</include>
-                  <include>org.apache.tika:tika-parser-crypto-module</include>
-                  <include>org.apache.tika:tika-parser-database-module</include>
-                  <include>org.apache.tika:tika-parser-ebook-module</include>
-                  <include>org.apache.tika:tika-parser-journal-module</include>
-                  <include>org.apache.tika:tika-parser-office-module</include>
-                  <include>org.apache.tika:tika-parser-package-module</include>
-                  <include>org.apache.tika:tika-parser-pdf-module</include>
-                  <include>org.apache.tika:tika-parser-scientific-module</include>
-                  <include>org.apache.tika:tika-parser-text-module</include>
-                  <include>org.apache.tika:tika-parser-web-module</include>
-                </includes>
-              </artifactSet>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>META-INF/services/org.apache.tika.detect.Detector</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>META-INF/services/org.apache.tika.detect.EncodingDetector</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>META-INF/services/org.apache.tika.parser.Parser</resource>
-                </transformer>
-              </transformers>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-
-    <pluginManagement>
-      <plugins>
-        <!-- This plugin's configuration is used to store Eclipse m2e      -->
-        <!-- settings only. It has no influence on the Maven build itself. -->
-        <plugin>
-          <groupId>org.eclipse.m2e</groupId>
-          <artifactId>lifecycle-mapping</artifactId>
-          <version>1.0.0</version>
-          <configuration>
-            <lifecycleMappingMetadata>
-              <pluginExecutions>
-                <pluginExecution>
-                  <pluginExecutionFilter>
-                    <groupId>org.apache.felix</groupId>
-                    <artifactId>maven-scr-plugin</artifactId>
-                    <versionRange>[1.7.2,)</versionRange>
-                    <goals>
-                      <goal>scr</goal>
-                    </goals>
-                  </pluginExecutionFilter>
-                  <action>
-                    <execute />
-                  </action>
-                </pluginExecution>
-              </pluginExecutions>
-            </lifecycleMappingMetadata>
-          </configuration>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-
-  <organization>
-    <name>The Apache Software Foundation</name>
-    <url>http://www.apache.org</url>
-  </organization>
-  <scm>
-    <url>http://svn.apache.org/viewvc/tika/trunk/tika-parsers</url>
-    <connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-parsers</connection>
-    <developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-parsers</developerConnection>
-  </scm>
-  <issueManagement>
-    <system>JIRA</system>
-    <url>https://issues.apache.org/jira/browse/TIKA</url>
-  </issueManagement>
-  <ciManagement>
-    <system>Jenkins</system>
-    <url>https://builds.apache.org/job/Tika-trunk/</url>
-  </ciManagement>
-</project>


[04/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
deleted file mode 100644
index 4889b38..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ /dev/null
@@ -1,312 +0,0 @@
-package org.apache.tika.parser;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import static org.apache.tika.TikaTest.assertContains;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.InputStream;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.parser.utils.CommonsDigester;
-import org.apache.tika.sax.BasicContentHandlerFactory;
-import org.apache.tika.sax.ContentHandlerFactory;
-import org.junit.Test;
-import org.xml.sax.helpers.DefaultHandler;
-
-public class RecursiveParserWrapperTest {
-
-    @Test
-    public void testBasicXML() throws Exception {
-        List<Metadata> list = getMetadata(new Metadata(),
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
-        Metadata container = list.get(0);
-        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
-        //not much differentiates html from xml in this test file
-        assertTrue(content.indexOf("<p class=\"header\" />") > -1);
-    }
-
-    @Test
-    public void testBasicHTML() throws Exception {
-        List<Metadata> list = getMetadata(new Metadata(),
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.HTML, -1));
-        Metadata container = list.get(0);
-        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
-        //not much differentiates html from xml in this test file
-        assertTrue(content.indexOf("<p class=\"header\"></p>") > -1);
-    }
-
-    @Test
-    public void testBasicText() throws Exception {
-        List<Metadata> list = getMetadata(new Metadata(),
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
-        Metadata container = list.get(0);
-        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
-        assertTrue(content.indexOf("<p ") < 0);
-        assertTrue(content.indexOf("embed_0") > -1);
-    }
-
-    @Test
-    public void testIgnoreContent() throws Exception {
-        List<Metadata> list = getMetadata(new Metadata(),
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, -1));
-        Metadata container = list.get(0);
-        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
-        assertNull(content);
-    }
-
-
-    @Test
-    public void testCharLimit() throws Exception {
-        ParseContext context = new ParseContext();
-        Metadata metadata = new Metadata();
-
-        Parser wrapped = new AutoDetectParser();
-        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, 60));
-        InputStream stream = RecursiveParserWrapperTest.class.getResourceAsStream(
-                "/test-documents/test_recursive_embedded.docx");
-        wrapper.parse(stream, new DefaultHandler(), metadata, context);
-        List<Metadata> list = wrapper.getMetadata();
-
-        assertEquals(5, list.size());
-
-        int wlr = 0;
-        for (Metadata m : list) {
-            String limitReached = m.get(RecursiveParserWrapper.WRITE_LIMIT_REACHED);
-            if (limitReached != null && limitReached.equals("true")) {
-                wlr++;
-            }
-        }
-        assertEquals(1, wlr);
-
-    }
-
-    @Test
-    public void testMaxEmbedded() throws Exception {
-        int maxEmbedded = 4;
-        int totalNoLimit = 12;//including outer container file
-        ParseContext context = new ParseContext();
-        Metadata metadata = new Metadata();
-        String limitReached = null;
-
-        Parser wrapped = new AutoDetectParser();
-        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
-
-        InputStream stream = RecursiveParserWrapperTest.class.getResourceAsStream(
-                "/test-documents/test_recursive_embedded.docx");
-        wrapper.parse(stream, new DefaultHandler(), metadata, context);
-        List<Metadata> list = wrapper.getMetadata();
-        //test default
-        assertEquals(totalNoLimit, list.size());
-
-        limitReached = list.get(0).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_LIMIT_REACHED);
-        assertNull(limitReached);
-
-
-        wrapper.reset();
-        stream.close();
-
-        //test setting value
-        metadata = new Metadata();
-        stream = RecursiveParserWrapperTest.class.getResourceAsStream(
-                "/test-documents/test_recursive_embedded.docx");
-        wrapper.setMaxEmbeddedResources(maxEmbedded);
-        wrapper.parse(stream, new DefaultHandler(), metadata, context);
-        list = wrapper.getMetadata();
-
-        //add 1 for outer container file
-        assertEquals(maxEmbedded + 1, list.size());
-
-        limitReached = list.get(0).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_LIMIT_REACHED);
-        assertEquals("true", limitReached);
-
-        wrapper.reset();
-        stream.close();
-
-        //test setting value < 0
-        metadata = new Metadata();
-        stream = RecursiveParserWrapperTest.class.getResourceAsStream(
-                "/test-documents/test_recursive_embedded.docx");
-
-        wrapper.setMaxEmbeddedResources(-2);
-        wrapper.parse(stream, new DefaultHandler(), metadata, context);
-        assertEquals(totalNoLimit, list.size());
-        limitReached = list.get(0).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_LIMIT_REACHED);
-        assertNull(limitReached);
-    }
-
-    @Test
-    public void testEmbeddedResourcePath() throws Exception {
-
-        Set<String> targets = new HashSet<String>();
-        targets.add("/embed1.zip");
-        targets.add("/embed1.zip/embed2.zip");
-        targets.add("/embed1.zip/embed2.zip/embed3.zip");
-        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip");
-        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt");
-        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed3.txt");
-        targets.add("/embed1.zip/embed2.zip/embed2a.txt");
-        targets.add("/embed1.zip/embed2.zip/embed2b.txt");
-        targets.add("/embed1.zip/embed1b.txt");
-        targets.add("/embed1.zip/embed1a.txt");
-        targets.add("/image1.emf");
-
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
-        List<Metadata> list = getMetadata(metadata,
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
-        Metadata container = list.get(0);
-        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
-        assertTrue(content.indexOf("<p class=\"header\" />") > -1);
-
-        Set<String> seen = new HashSet<String>();
-        for (Metadata m : list) {
-            String path = m.get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH);
-            if (path != null) {
-                seen.add(path);
-            }
-        }
-        assertEquals(targets, seen);
-    }
-
-    @Test
-    public void testEmbeddedNPE() throws Exception {
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
-        List<Metadata> list = getMetadata(metadata,
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
-        //default behavior (user doesn't specify whether or not to catch embedded exceptions
-        //is to catch the exception
-        assertEquals(13, list.size());
-        Metadata mockNPEMetadata = list.get(10);
-        assertContains("java.lang.NullPointerException", mockNPEMetadata.get(RecursiveParserWrapper.EMBEDDED_EXCEPTION));
-
-        metadata = new Metadata();
-        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
-        list = getMetadata(metadata,
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
-                false, null);
-
-        //Composite parser swallows caught TikaExceptions, IOExceptions and SAXExceptions
-        //and just doesn't bother to report that there was an exception.
-        assertEquals(12, list.size());
-    }
-
-    @Test
-    public void testPrimaryExcWEmbedded() throws Exception {
-        //if embedded content is handled and then
-        //the parser hits an exception in the container document,
-        //that the first element of the returned list is the container document
-        //and the second is the embedded content
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.RESOURCE_NAME_KEY, "embedded_then_npe.xml");
-
-        ParseContext context = new ParseContext();
-        Parser wrapped = new AutoDetectParser();
-        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1), true);
-        String path = "/test-documents/mock/embedded_then_npe.xml";
-
-        InputStream stream = null;
-        boolean npe = false;
-        try {
-            stream = RecursiveParserWrapperTest.class.getResourceAsStream(
-                    path);
-            wrapper.parse(stream, new DefaultHandler(), metadata, context);
-        } catch (TikaException e) {
-            if (e.getCause().getClass().equals(NullPointerException.class)) {
-                npe = true;
-            }
-        } finally {
-            IOUtils.closeQuietly(stream);
-        }
-        assertTrue("npe", npe);
-
-        List<Metadata> metadataList = wrapper.getMetadata();
-        assertEquals(2, metadataList.size());
-        Metadata outerMetadata = metadataList.get(0);
-        Metadata embeddedMetadata = metadataList.get(1);
-        assertContains("main_content", outerMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
-        assertEquals("embedded_then_npe.xml", outerMetadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
-        assertEquals("Nikolai Lobachevsky", outerMetadata.get("author"));
-
-        assertContains("some_embedded_content", embeddedMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
-        assertEquals("embed1.xml", embeddedMetadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
-        assertEquals("embeddedAuthor", embeddedMetadata.get("author"));
-    }
-
-    @Test
-    public void testDigesters() throws Exception {
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
-        List<Metadata> list = getMetadata(metadata,
-                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
-                true, new CommonsDigester(100000, CommonsDigester.DigestAlgorithm.MD5));
-        int i = 0;
-        Metadata m0 = list.get(0);
-        Metadata m6 = list.get(6);
-        String md5Key = "X-TIKA:digest:MD5";
-        assertEquals("59f626e09a8c16ab6dbc2800c685f772", list.get(0).get(md5Key));
-        assertEquals("ccdf3882e7e4c2454e28884db9b0a54d", list.get(6).get(md5Key));
-        assertEquals("a869bf6432ebd14e19fc79416274e0c9", list.get(7).get(md5Key));
-    }
-
-    private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory,
-                                       boolean catchEmbeddedExceptions,
-                                       DigestingParser.Digester digester) throws Exception {
-        ParseContext context = new ParseContext();
-        Parser wrapped = new AutoDetectParser();
-        if (digester != null) {
-            wrapped = new DigestingParser(wrapped, digester);
-        }
-        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
-                contentHandlerFactory, catchEmbeddedExceptions);
-        String path = metadata.get(Metadata.RESOURCE_NAME_KEY);
-        if (path == null) {
-            path = "/test-documents/test_recursive_embedded.docx";
-        } else {
-            path = "/test-documents/" + path;
-        }
-        InputStream stream = null;
-        try {
-            stream = TikaInputStream.get(RecursiveParserWrapperTest.class.getResource(path).toURI());
-            wrapper.parse(stream, new DefaultHandler(), metadata, context);
-        } finally {
-            IOUtils.closeQuietly(stream);
-        }
-        return wrapper.getMetadata();
-
-    }
-
-    private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory)
-            throws Exception {
-        return getMetadata(metadata, contentHandlerFactory, true, null);
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
deleted file mode 100644
index 54c1427..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.fork;
-
-import static org.apache.tika.TikaTest.assertContains;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.NotSerializableException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.tika.Tika;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.fork.ForkParser;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Test that the ForkParser correctly behaves when
- *  wired in to the regular Parsers and their test data
- */
-public class ForkParserIntegrationTest {
-
-    private Tika tika = new Tika(); // TODO Use TikaConfig instead, when it works
-
-    /**
-     * Simple text parsing
-     */
-    @Test
-    public void testForkedTextParsing() throws Exception {
-        ForkParser parser = new ForkParser(
-                ForkParserIntegrationTest.class.getClassLoader(),
-                tika.getParser());
-
-       try {
-          ContentHandler output = new BodyContentHandler();
-          InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
-                  "/test-documents/testTXT.txt");
-          ParseContext context = new ParseContext();
-          parser.parse(stream, output, new Metadata(), context);
-
-          String content = output.toString();
-          assertContains("Test d'indexation", content);
-          assertContains("http://www.apache.org", content);
-       } finally {
-          parser.close();
-       }
-    }
-   
-    /**
-     * This error has a message and an equals() implementation as to be able 
-     * to match it against the serialized version of itself.
-     */
-    static class AnError extends Error {
-        private static final long serialVersionUID = -6197267350768803348L;
-        private String message;
-        AnError(String message) {
-            super(message);
-            this.message = message;
-        }
-
-        @Override
-        public boolean equals(Object o) {
-            if (this == o) return true;
-            if (o == null || getClass() != o.getClass()) return false;
-
-            AnError anError = (AnError) o;
-
-            if (!message.equals(anError.message)) return false;
-
-            return true;
-        }
-
-        @Override
-        public int hashCode() {
-            return message.hashCode();
-        }
-    }
-    
-    /**
-     * This error isn't serializable on the server, so can't be sent back
-     *  to the Fork Client once it has occured
-     */
-    static class WontBeSerializedError extends RuntimeException {
-       private static final long serialVersionUID = 1L;
-
-       WontBeSerializedError(String message) {
-          super(message);
-       }
-
-       private void writeObject(java.io.ObjectOutputStream out) {
-          RuntimeException e = new RuntimeException("Bang!");
-          boolean found = false;
-          for (StackTraceElement ste : e.getStackTrace()) {
-             if (ste.getClassName().equals(ForkParser.class.getName())) {
-                found = true;
-                break;
-             }
-          }
-          if (!found) {
-             throw e;
-          }
-       }
-    }
-    
-    static class BrokenParser implements Parser {
-        private static final long serialVersionUID = 995871497930817839L;
-        public Error err = new AnError("Simulated fail");
-        public RuntimeException re = null;
-        
-        public Set<MediaType> getSupportedTypes(ParseContext context) {
-            return new HashSet<MediaType>(Arrays.asList(MediaType.TEXT_PLAIN));
-        }
-
-        public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
-            if (re != null) throw re;
-            throw err;
-        }
-    }
-    
-    /**
-     * TIKA-831 Parsers throwing errors should be caught and
-     *  properly reported
-     */
-    @Test
-    public void testParsingErrorInForkedParserShouldBeReported() throws Exception {
-        BrokenParser brokenParser = new BrokenParser();
-        Parser parser = new ForkParser(ForkParser.class.getClassLoader(), brokenParser);
-        InputStream stream = getClass().getResourceAsStream("/test-documents/testTXT.txt");
-        
-        // With a serializable error, we'll get that back
-        try {
-            ContentHandler output = new BodyContentHandler();
-            ParseContext context = new ParseContext();
-            parser.parse(stream, output, new Metadata(), context);
-            fail("Expected TikaException caused by Error");
-        } catch (TikaException e) {
-            assertEquals(brokenParser.err, e.getCause());
-        }
-        
-        // With a non serializable one, we'll get something else
-        // TODO Fix this test
-        brokenParser = new BrokenParser();
-        brokenParser.re= new WontBeSerializedError("Can't Serialize");
-        parser = new ForkParser(ForkParser.class.getClassLoader(), brokenParser);
-//        try {
-//           ContentHandler output = new BodyContentHandler();
-//           ParseContext context = new ParseContext();
-//           parser.parse(stream, output, new Metadata(), context);
-//           fail("Expected TikaException caused by Error");
-//       } catch (TikaException e) {
-//           assertEquals(TikaException.class, e.getCause().getClass());
-//           assertEquals("Bang!", e.getCause().getMessage());
-//       }
-    }
-    
-    /**
-     * If we supply a non serializable object on the ParseContext,
-     *  check we get a helpful exception back
-     */
-    @Test
-    public void testParserHandlingOfNonSerializable() throws Exception {
-       ForkParser parser = new ForkParser(
-             ForkParserIntegrationTest.class.getClassLoader(),
-             tika.getParser());
-       
-       ParseContext context = new ParseContext();
-       context.set(Detector.class, new Detector() {
-          public MediaType detect(InputStream input, Metadata metadata) {
-             return MediaType.OCTET_STREAM;
-          }
-       });
-
-       try {
-          ContentHandler output = new BodyContentHandler();
-          InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
-              "/test-documents/testTXT.txt");
-          parser.parse(stream, output, new Metadata(), context);
-          fail("Should have blown up with a non serializable ParseContext");
-       } catch(TikaException e) {
-          // Check the right details
-          assertNotNull(e.getCause());
-          assertEquals(NotSerializableException.class, e.getCause().getClass());
-          assertEquals("Unable to serialize ParseContext to pass to the Forked Parser", e.getMessage());
-       } finally {
-          parser.close();
-       }
-    }
-
-    /**
-     * TIKA-832
-     */
-    @Test
-    public void testAttachingADebuggerOnTheForkedParserShouldWork()
-            throws Exception {
-        ParseContext context = new ParseContext();
-        context.set(Parser.class, tika.getParser());
-
-        ForkParser parser = new ForkParser(
-                ForkParserIntegrationTest.class.getClassLoader(),
-                tika.getParser());
-        parser.setJavaCommand(Arrays.asList("java", "-Xmx32m", "-Xdebug",
-                                            "-Xrunjdwp:transport=dt_socket,address=54321,server=y,suspend=n"));
-        try {
-            ContentHandler body = new BodyContentHandler();
-            InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
-                    "/test-documents/testTXT.txt");
-            parser.parse(stream, body, new Metadata(), context);
-            String content = body.toString();
-            assertContains("Test d'indexation", content);
-            assertContains("http://www.apache.org", content);
-        } finally {
-            parser.close();
-        }
-    }
-
-    /**
-     * TIKA-808 - Ensure that parsing of our test PDFs work under
-     * the Fork Parser, to ensure that complex parsing behaves
-     */
-    @Test
-    public void testForkedPDFParsing() throws Exception {
-        ForkParser parser = new ForkParser(
-                ForkParserIntegrationTest.class.getClassLoader(),
-                tika.getParser());
-        try {
-            ContentHandler output = new BodyContentHandler();
-            InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
-                    "/test-documents/testPDF.pdf");
-            ParseContext context = new ParseContext();
-            parser.parse(stream, output, new Metadata(), context);
-
-            String content = output.toString();
-            assertContains("Apache Tika", content);
-            assertContains("Tika - Content Analysis Toolkit", content);
-            assertContains("incubator", content);
-            assertContains("Apache Software Foundation", content);
-        } finally {
-            parser.close();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
deleted file mode 100644
index d222e68..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
+++ /dev/null
@@ -1,251 +0,0 @@
-package org.apache.tika.parser.mock;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PrintStream;
-import java.util.Date;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
-import org.junit.Test;
-
-/**
- * Somewhat bizarrely, we can't put the test of this test resource in tika-test-resources
- * or else it will be called by every module that uses it.  Um, Yossarian!!!
- */
-public class MockParserTest extends TikaTest {
-    private final static String M = "/test-documents/mock/";
-    private final static Parser PARSER = new AutoDetectParser();
-
-    @Override
-    public XMLResult getXML(String path, Metadata m) throws Exception {
-        //note that this is specific to MockParserTest with addition of M to the path!
-        InputStream is = getResourceAsStream(M+path);
-        try {
-            return super.getXML(is, PARSER, m);
-        } finally {
-            IOUtils.closeQuietly(is);
-        }
-    }
-
-    @Test
-    public void testExample() throws Exception {
-        Metadata m = new Metadata();
-        PrintStream out = System.out;
-        PrintStream err = System.err;
-        ByteArrayOutputStream outBos = new ByteArrayOutputStream();
-        ByteArrayOutputStream errBos = new ByteArrayOutputStream();
-        PrintStream tmpOut = new PrintStream(outBos, true, UTF_8.toString());
-        PrintStream tmpErr = new PrintStream(errBos, true, UTF_8.toString());
-        System.setOut(tmpOut);
-        System.setErr(tmpErr);
-        try {
-            assertThrowable("example.xml", m, IOException.class, "not another IOException");
-            assertMockParser(m);
-        } finally {
-            System.setOut(out);
-            System.setErr(err);
-        }
-        String outString = new String(outBos.toByteArray(), UTF_8);
-        assertContains("writing to System.out", outString);
-
-        String errString = new String(errBos.toByteArray(), UTF_8);
-        assertContains("writing to System.err", errString);
-
-    }
-
-    @Test
-    public void testNothingBad() throws Exception {
-        Metadata m = new Metadata();
-        String content = getXML("nothing_bad.xml", m).xml;
-        assertEquals("Geoffrey Chaucer", m.get("author"));
-        assertContains("<p>And bathed every veyne in swich licour,</p>", content);
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testNullPointer() throws Exception {
-        Metadata m = new Metadata();
-        assertThrowable("null_pointer.xml", m, NullPointerException.class, "another null pointer exception");
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testNullPointerNoMsg() throws Exception {
-        Metadata m = new Metadata();
-        assertThrowable("null_pointer_no_msg.xml", m, NullPointerException.class, null);
-        assertMockParser(m);
-    }
-
-
-    @Test
-    public void testSleep() throws Exception {
-        long start = new Date().getTime();
-        Metadata m = new Metadata();
-        String content = getXML("sleep.xml", m).xml;
-        assertMockParser(m);
-        long elapsed = new Date().getTime()-start;
-        //should sleep for at least 3000
-        boolean enoughTimeHasElapsed = elapsed > 2000;
-        assertTrue("not enough time has not elapsed: "+elapsed, enoughTimeHasElapsed);
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testHeavyHang() throws Exception {
-        long start = new Date().getTime();
-        Metadata m = new Metadata();
-
-        String content = getXML("heavy_hang.xml", m).xml;
-        assertMockParser(m);
-        long elapsed = new Date().getTime()-start;
-        //should sleep for at least 3000
-        boolean enoughTimeHasElapsed = elapsed > 2000;
-        assertTrue("not enough time has elapsed: "+elapsed, enoughTimeHasElapsed);
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testFakeOOM() throws Exception {
-        Metadata m = new Metadata();
-        assertThrowable("fake_oom.xml", m, OutOfMemoryError.class, "not another oom");
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testRealOOM() throws Exception {
-        //Note: we're not actually testing the diff between fake and real oom
-        //i.e. by creating child process and setting different -Xmx or
-        //memory profiling.
-        Metadata m = new Metadata();
-        assertThrowable("real_oom.xml", m, OutOfMemoryError.class, "Java heap space");
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testInterruptibleSleep() {
-        //Without static initialization of the parser, it can take ~1 second after t.start()
-        //before the parser actually calls parse.  This is
-        //just the time it takes to instantiate and call AutoDetectParser, do the detection, etc.
-        //This is not thread creation overhead.
-        ParserRunnable r = new ParserRunnable("sleep_interruptible.xml");
-        Thread t = new Thread(r);
-        t.start();
-        long start = new Date().getTime();
-        try {
-            Thread.sleep(1000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-
-        t.interrupt();
-
-        try {
-            t.join(10000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-        long elapsed = new Date().getTime()-start;
-        boolean shortEnough = elapsed < 2000;//the xml file specifies 3000
-        assertTrue("elapsed (" + elapsed + " millis) was not short enough", shortEnough);
-    }
-
-    @Test
-    public void testNonInterruptibleSleep() {
-        ParserRunnable r = new ParserRunnable("sleep_not_interruptible.xml");
-        Thread t = new Thread(r);
-        t.start();
-        long start = new Date().getTime();
-        try {
-            //make sure that the thread has actually started
-            Thread.sleep(1000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-        t.interrupt();
-        try {
-            t.join(20000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-        long elapsed = new Date().getTime()-start;
-        boolean longEnough = elapsed > 3000;//the xml file specifies 3000, this sleeps 1000
-        assertTrue("elapsed ("+elapsed+" millis) was not long enough", longEnough);
-    }
-
-    private class ParserRunnable implements Runnable {
-        private final String path;
-        ParserRunnable(String path) {
-            this.path = path;
-        }
-        @Override
-        public void run() {
-            Metadata m = new Metadata();
-            try {
-                getXML(path, m);
-            } catch (Exception e) {
-                throw new RuntimeException(e);
-            } finally {
-                assertMockParser(m);
-            }
-        }
-    }
-
-    private void assertThrowable(String path, Metadata m, Class<? extends Throwable> expected, String message) {
-
-        try {
-            getXML(path, m);
-        } catch (Throwable t) {
-            //if this is a throwable wrapped in a TikaException, use the cause
-            if (t instanceof TikaException && t.getCause() != null) {
-                t = t.getCause();
-            }
-            if (! (t.getClass().isAssignableFrom(expected))){
-                fail(t.getClass() +" is not assignable from "+expected);
-            }
-            if (message != null) {
-                assertEquals(message, t.getMessage());
-            }
-        }
-    }
-
-    private void assertMockParser(Metadata m) {
-        String[] parsers = m.getValues("X-Parsed-By");
-        //make sure that it was actually parsed by mock.
-        boolean parsedByMock = false;
-        for (String parser : parsers) {
-            if (parser.equals("org.apache.tika.parser.mock.MockParser")) {
-                parsedByMock = true;
-                break;
-            }
-        }
-        assertTrue("mock parser should have been called", parsedByMock);
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageTest.java
deleted file mode 100644
index 26d263b..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/PackageTest.java
+++ /dev/null
@@ -1,335 +0,0 @@
-package org.apache.tika.parser.pkg;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.InputStream;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Before;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class PackageTest extends TikaTest {
-
-    private static final MediaType TYPE_7ZIP = MediaType.application("x-7z-compressed");
-    
-    private ParseContext recursingContext;
-    private Parser autoDetectParser;
-    
-    @Before
-    public void setUp() throws Exception {
-       
-       autoDetectParser = new AutoDetectParser();
-       recursingContext = new ParseContext();
-       recursingContext.set(Parser.class, autoDetectParser);
-    }
-    
-    @Test
-    public void testZlibParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/testTXT.zlib")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/zlib", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("http://www.apache.org", content);
-    }
-    
-    
-    @Test
-    public void testArParsing() throws Exception {
-        Parser parser = new AutoDetectParser();
-
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/testARofText.ar")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-archive",
-                metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("http://www.apache.org", content);
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/testARofSND.ar")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-archive",
-                     metadata.get(Metadata.CONTENT_TYPE));
-        content = handler.toString();
-        assertContains("testAU.au", content);
-    }
-    
-    @Test
-    public void testBzip2Parsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tbz2")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-bzip2", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-    
-    @Test
-    public void testCompressParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tar.Z");
-        try {
-            parser.parse(stream, handler, metadata, recursingContext);
-        } finally {
-            stream.close();
-        }
-
-        assertEquals("application/x-compress", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-    
-    @Test
-    public void testGzipParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tgz")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/gzip", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-    
-    @Test
-    public void testRarParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/test-documents.rar")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-rar-compressed", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-    
-    @Test
-    public void test7ZParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-        
-        // Ensure 7zip is a parsable format
-        assertTrue("No 7zip parser found", 
-                parser.getSupportedTypes(recursingContext).contains(TYPE_7ZIP));
-        
-        // Parse
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/test-documents.7z")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals(TYPE_7ZIP.toString(), metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-    @Test
-    public void testTarParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/test-documents.tar")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/x-tar", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("test-documents/testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("test-documents/testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("test-documents/testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("test-documents/testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("test-documents/testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("test-documents/testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("test-documents/testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("test-documents/testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("test-documents/testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-    
-    @Test
-    public void testZipParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/test-documents.zip")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/zip", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("testEXCEL.xls", content);
-        assertContains("Sample Excel Worksheet", content);
-        assertContains("testHTML.html", content);
-        assertContains("Test Indexation Html", content);
-        assertContains("testOpenOffice2.odt", content);
-        assertContains("This is a sample Open Office document", content);
-        assertContains("testPDF.pdf", content);
-        assertContains("Apache Tika", content);
-        assertContains("testPPT.ppt", content);
-        assertContains("Sample Powerpoint Slide", content);
-        assertContains("testRTF.rtf", content);
-        assertContains("indexation Word", content);
-        assertContains("testTXT.txt", content);
-        assertContains("Test d'indexation de Txt", content);
-        assertContains("testWORD.doc", content);
-        assertContains("This is a sample Microsoft Word Document", content);
-        assertContains("testXML.xml", content);
-        assertContains("Rida Benjelloun", content);
-    }
-    
-    @Test
-    public void testSvgzParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = PackageTest.class.getResourceAsStream(
-                "/test-documents/testSVG.svgz")) {
-            parser.parse(stream, handler, metadata, recursingContext);
-        }
-
-        assertEquals("application/gzip", metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        assertContains("Test SVG image", content);
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java b/tika-parsers/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java
deleted file mode 100644
index d30759a..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/sax/PhoneExtractingContentHandlerTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.sax;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.junit.Test;
-
-import java.io.InputStream;
-
-import static org.apache.tika.TikaTest.assertContains;
-
-/**
- * Test class for the {@link org.apache.tika.sax.PhoneExtractingContentHandler}
- * class. This demonstrates how to parse a document and retrieve any phone numbers
- * found within.
- *
- * The phone numbers are added to a multivalued Metadata object under the key, "phonenumbers".
- * You can get an array of phone numbers by calling metadata.getValues("phonenumber").
- */
-public class PhoneExtractingContentHandlerTest {
-    @Test
-    public void testExtractPhoneNumbers() throws Exception {
-        Parser parser = new AutoDetectParser();
-        Metadata metadata = new Metadata();
-        // The PhoneExtractingContentHandler will examine any characters for phone numbers before passing them
-        // to the underlying Handler.
-        PhoneExtractingContentHandler handler = new PhoneExtractingContentHandler(new BodyContentHandler(), metadata);
-        try (InputStream stream = PhoneExtractingContentHandlerTest.class.getResourceAsStream("/test-documents/testPhoneNumberExtractor.odt")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-        String[] phoneNumbers = metadata.getValues("phonenumbers");
-        assertContains("9498888888", phoneNumbers[0]);
-        assertContains("9497777777", phoneNumbers[1]);
-        assertContains("9496666666", phoneNumbers[2]);
-        assertContains("9495555555", phoneNumbers[3]);
-        assertContains("4193404645", phoneNumbers[4]);
-        assertContains("9044687081", phoneNumbers[5]);
-        assertContains("2604094811", phoneNumbers[6]);
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java b/tika-parsers/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java
deleted file mode 100644
index 62660c8..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/utils/ServiceLoaderUtilsTest.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.utils;
-
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.parser.DefaultParser;
-import org.apache.tika.parser.Parser;
-import org.junit.Test;
-
-public class ServiceLoaderUtilsTest extends TikaTest {
-    @Test
-    public void testOrdering() throws Exception {
-        //make sure that non Tika parsers come last
-        //which means that they'll overwrite Tika parsers and
-        //be preferred.
-        DefaultParser defaultParser = new DefaultParser();
-        int vorbisIndex = -1;
-        int fictIndex = -1;
-        int dcxmlIndex = -1;
-        int i = 0;
-        for (Parser p : defaultParser.getAllComponentParsers()) {
-            if ("class org.gagravarr.tika.VorbisParser".equals(p.getClass().toString())) {
-                vorbisIndex = i;
-            }
-            if ("class org.apache.tika.parser.xml.FictionBookParser".equals(p.getClass().toString())) {
-                fictIndex = i;
-            }
-            if ("class org.apache.tika.parser.xml.DcXMLParser".equals(p.getClass().toString())) {
-                dcxmlIndex = i;
-            }
-            i++;
-        }
-
-        assertNotEquals(vorbisIndex, fictIndex);
-        assertNotEquals(fictIndex, dcxmlIndex);
-        assertTrue(vorbisIndex > fictIndex);
-        assertTrue(fictIndex > dcxmlIndex);
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/pom.xml
----------------------------------------------------------------------
diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index fa55b55..7d118cb 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -131,7 +131,13 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
-
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-test-resources</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
index 4804398..aafde60 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
+++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
@@ -39,7 +39,7 @@ import org.apache.cxf.rs.security.cors.CrossOriginResourceSharingFilter;
 import org.apache.tika.Tika;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.server.resource.DetectorResource;
 import org.apache.tika.server.resource.LanguageResource;
 import org.apache.tika.server.resource.MetadataResource;

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
index 9dab196..0eb3cc3 100644
--- a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
+++ b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
@@ -18,8 +18,6 @@
 package org.apache.tika.server;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -41,14 +39,15 @@ import org.apache.cxf.binding.BindingFactoryManager;
 import org.apache.cxf.endpoint.Server;
 import org.apache.cxf.jaxrs.JAXRSBindingFactory;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.server.resource.TikaResource;
 import org.apache.tika.server.resource.UnpackerResource;
 import org.junit.After;
 import org.junit.Before;
 
-public abstract class CXFTestBase {
+public abstract class CXFTestBase extends TikaTest {
     private final static int DIGESTER_READ_LIMIT = 20*1024*1024;
 
     protected static final String endPoint =
@@ -56,13 +55,6 @@ public abstract class CXFTestBase {
     protected Server server;
     private TikaConfig tika;
 
-    public static void assertContains(String needle, String haystack) {
-        assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
-    }
-
-    public static void assertNotFound(String needle, String haystack) {
-        assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle));
-    }
 
     protected static InputStream copy(InputStream in, int remaining) throws IOException {
         ByteArrayOutputStream out = new ByteArrayOutputStream();

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
index 3d4dc1f..2ec2682 100644
--- a/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
@@ -68,7 +68,7 @@ public class DetectorResourceTest extends CXFTestBase {
                 .accept("*/*")
                 .header("Content-Disposition",
                         "attachment; filename=" + FOO_CSV)
-                .put(ClassLoader.getSystemResourceAsStream(FOO_CSV));
+                .put(getTestDocumentAsStream(FOO_CSV));
         assertNotNull(response);
         String readMime = getStringFromInputStream((InputStream) response
                 .getEntity());
@@ -85,7 +85,7 @@ public class DetectorResourceTest extends CXFTestBase {
                 .accept("*/*")
                 .header("Content-Disposition",
                         "attachment; filename=" + CDEC_CSV_NO_EXT)
-                .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT));
+                .put(getTestDocumentAsStream(CDEC_CSV_NO_EXT));
         assertNotNull(response);
         String readMime = getStringFromInputStream((InputStream) response
                 .getEntity());
@@ -98,7 +98,7 @@ public class DetectorResourceTest extends CXFTestBase {
                 .accept("*/*")
                 .header("Content-Disposition",
                         "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv")
-                .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT));
+                .put(getTestDocumentAsStream(CDEC_CSV_NO_EXT));
         assertNotNull(response);
         readMime = getStringFromInputStream((InputStream) response.getEntity());
         assertEquals("text/csv", readMime);

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java
index c3ca475..c2c4397 100644
--- a/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java
@@ -87,7 +87,7 @@ public class LanguageResourceTest extends CXFTestBase {
 		String url = endPoint + LANG_STREAM_PATH;
 		Response response = WebClient.create(url).type("text/plain")
 				.accept("text/plain")
-				.put(ClassLoader.getSystemResourceAsStream("english.txt"));
+				.put(getTestDocumentAsStream("english.txt"));
 		assertNotNull(response);
 		String readLang = getStringFromInputStream((InputStream) response
 				.getEntity());
@@ -99,7 +99,7 @@ public class LanguageResourceTest extends CXFTestBase {
 		String url = endPoint + LANG_STREAM_PATH;
 		Response response = WebClient.create(url).type("text/plain")
 				.accept("text/plain")
-				.put(ClassLoader.getSystemResourceAsStream("french.txt"));
+				.put(getTestDocumentAsStream("french.txt"));
 		assertNotNull(response);
 		String readLang = getStringFromInputStream((InputStream) response
 				.getEntity());

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
index 7cd5f1d..5e4c0d0 100644
--- a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
@@ -73,8 +73,7 @@ public class MetadataResourceTest extends CXFTestBase {
                 .create(endPoint + META_PATH)
                 .type("application/msword")
                 .accept("text/csv")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_DOC));
 
         Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
 
@@ -100,8 +99,7 @@ public class MetadataResourceTest extends CXFTestBase {
                 .create(endPoint + META_PATH)
                 .type("application/vnd.ms-excel")
                 .accept("text/csv")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
 
         // Won't work, no password given
         assertEquals(500, response.getStatus());
@@ -112,7 +110,7 @@ public class MetadataResourceTest extends CXFTestBase {
                 .type("application/vnd.ms-excel")
                 .accept("text/csv")
                 .header("Password", "wrong password")
-                .put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
 
         assertEquals(500, response.getStatus());
 
@@ -122,7 +120,7 @@ public class MetadataResourceTest extends CXFTestBase {
                 .type("application/vnd.ms-excel")
                 .accept("text/csv")
                 .header("Password", "password")
-                .put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
 
         // Will work
         assertEquals(200, response.getStatus());
@@ -149,8 +147,7 @@ public class MetadataResourceTest extends CXFTestBase {
                 .create(endPoint + META_PATH)
                 .type("application/msword")
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_DOC));
 
         Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
 
@@ -165,8 +162,7 @@ public class MetadataResourceTest extends CXFTestBase {
                 .create(endPoint + META_PATH)
                 .type("application/msword")
                 .accept("application/rdf+xml")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_DOC));
 
         String result = IOUtils.readStringFromStream((InputStream) response.getEntity());
         assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", result);
@@ -176,14 +172,14 @@ public class MetadataResourceTest extends CXFTestBase {
     @Test
     public void testGetField_XXX_NotFound() throws Exception {
         Response response = WebClient.create(endPoint + META_PATH + "/xxx").type("application/msword")
-                .accept(MediaType.APPLICATION_JSON).put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+                .accept(MediaType.APPLICATION_JSON).put(getTestDocumentAsStream(TikaResourceTest.TEST_DOC));
         Assert.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), response.getStatus());
     }
 
     @Test
     public void testGetField_Author_TEXT_Partial_BAD_REQUEST() throws Exception {
 
-        InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+        InputStream stream = getTestDocumentAsStream(TikaResourceTest.TEST_DOC);
 
         Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
                 .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));
@@ -193,7 +189,7 @@ public class MetadataResourceTest extends CXFTestBase {
     @Test
     public void testGetField_Author_TEXT_Partial_Found() throws Exception {
 
-        InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+        InputStream stream = getTestDocumentAsStream(TikaResourceTest.TEST_DOC);
 
         Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
                 .accept(MediaType.TEXT_PLAIN).put(copy(stream, 12000));
@@ -205,7 +201,7 @@ public class MetadataResourceTest extends CXFTestBase {
     @Test
     public void testGetField_Author_JSON_Partial_Found() throws Exception {
 
-        InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+        InputStream stream = getTestDocumentAsStream(TikaResourceTest.TEST_DOC);
 
         Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
                 .accept(MediaType.APPLICATION_JSON).put(copy(stream, 12000));
@@ -219,7 +215,7 @@ public class MetadataResourceTest extends CXFTestBase {
     @Test
     public void testGetField_Author_XMP_Partial_Found() throws Exception {
 
-        InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+        InputStream stream = getTestDocumentAsStream(TikaResourceTest.TEST_DOC);
 
         Response response = WebClient.create(endPoint + META_PATH + "/dc:creator").type("application/msword")
                 .accept("application/rdf+xml").put(copy(stream, 12000));

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
index 56910a9..9d41ff1 100644
--- a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
@@ -73,8 +73,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         Response response = WebClient
                 .create(endPoint + META_PATH)
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
 
         Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
         List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
@@ -92,8 +91,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
                 .create(endPoint + META_PATH)
                 .type("application/vnd.ms-excel")
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
 
         // Won't work, no password given
         assertEquals(500, response.getStatus());
@@ -104,7 +102,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
                 .type("application/vnd.ms-excel")
                 .accept("application/json")
                 .header("Password", "password")
-                .put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
+                .put(getTestDocumentAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
 
         // Will work
         assertEquals(200, response.getStatus());
@@ -122,8 +120,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         Response response = WebClient
                 .create(endPoint+META_PATH)
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
 
         Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
         List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
@@ -135,8 +132,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         response = WebClient
                 .create(endPoint + META_PATH + SLASH)
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
         metadataList = JsonMetadataList.fromJson(reader);
         assertEquals(12, metadataList.size());
@@ -147,8 +143,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         response = WebClient
                 .create(endPoint + META_PATH + UNPARSEABLE_PATH)
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
         metadataList = JsonMetadataList.fromJson(reader);
         assertEquals(12, metadataList.size());
@@ -159,8 +154,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         response = WebClient
                 .create(endPoint + META_PATH + XML_PATH)
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
         metadataList = JsonMetadataList.fromJson(reader);
         assertEquals(12, metadataList.size());
@@ -171,8 +165,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         response = WebClient
                 .create(endPoint + META_PATH + TEXT_PATH)
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
         metadataList = JsonMetadataList.fromJson(reader);
         assertEquals(12, metadataList.size());
@@ -183,8 +176,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         response = WebClient
                 .create(endPoint + META_PATH + IGNORE_PATH)
                 .accept("application/json")
-                .put(ClassLoader
-                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
         metadataList = JsonMetadataList.fromJson(reader);
         assertEquals(12, metadataList.size());
@@ -198,7 +190,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         Attachment attachmentPart =
                 new Attachment("myworddocx",
                         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                        ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                        getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         WebClient webClient = WebClient.create(endPoint + META_PATH + FORM_PATH);
 
         Response response = webClient.type("multipart/form-data")
@@ -214,7 +206,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         attachmentPart =
                 new Attachment("myworddocx",
                         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                        ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                        getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         webClient = WebClient.create(endPoint + META_PATH + FORM_PATH + UNPARSEABLE_PATH);
 
         response = webClient.type("multipart/form-data")
@@ -230,7 +222,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         attachmentPart =
                 new Attachment("myworddocx",
                         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                        ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                        getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         webClient = WebClient.create(endPoint + META_PATH + FORM_PATH + XML_PATH);
 
         response = webClient.type("multipart/form-data")
@@ -246,7 +238,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         attachmentPart =
                 new Attachment("myworddocx",
                         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                        ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                        getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         webClient = WebClient.create(endPoint + META_PATH + FORM_PATH+TEXT_PATH);
 
         response = webClient.type("multipart/form-data")
@@ -262,7 +254,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         attachmentPart =
                 new Attachment("myworddocx",
                         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                        ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                        getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         webClient = WebClient.create(endPoint + META_PATH +FORM_PATH+IGNORE_PATH);
 
         response = webClient.type("multipart/form-data")

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java b/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java
index bd5fefe..065cf52 100644
--- a/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java
@@ -90,7 +90,7 @@ public class StackTraceOffTest extends CXFTestBase {
                     .accept("*/*")
                     .header("Content-Disposition",
                             "attachment; filename=" + TEST_PASSWORD_PROTECTED)
-                    .put(ClassLoader.getSystemResourceAsStream(TEST_PASSWORD_PROTECTED));
+                    .put(getTestDocumentAsStream(TEST_PASSWORD_PROTECTED));
             assertNotNull("null response: " + path, response);
             assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus());
             String msg = getStringFromInputStream((InputStream) response
@@ -105,7 +105,7 @@ public class StackTraceOffTest extends CXFTestBase {
             Response response = WebClient
                     .create(endPoint + path)
                     .accept("*/*")
-                    .put(ClassLoader.getSystemResourceAsStream(TEST_NULL));
+                    .put(getTestDocumentAsStream(TEST_NULL));
             assertNotNull("null response: " + path, response);
             assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus());
             String msg = getStringFromInputStream((InputStream) response
@@ -124,7 +124,7 @@ public class StackTraceOffTest extends CXFTestBase {
                     .accept("*/*")
                     .header("Content-Disposition",
                             "attachment; filename=null_pointer.evil")
-                    .put(ClassLoader.getSystemResourceAsStream(TEST_NULL));
+                    .put(getTestDocumentAsStream(TEST_NULL));
             assertNotNull("null response: " + path, response);
             assertEquals("bad type: " + path, 415, response.getStatus());
             String msg = getStringFromInputStream((InputStream) response
@@ -139,7 +139,7 @@ public class StackTraceOffTest extends CXFTestBase {
     //exceptions as the others...
     @Test
     public void testMeta() throws Exception {
-        InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+        InputStream stream = getTestDocumentAsStream(TikaResourceTest.TEST_DOC);
 
         Response response = WebClient.create(endPoint + "/meta" + "/Author").type("application/msword")
                 .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java b/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
index 410824a..a45d1eb 100644
--- a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
@@ -84,7 +84,7 @@ public class StackTraceTest extends CXFTestBase {
                     .accept("*/*")
                     .header("Content-Disposition",
                             "attachment; filename=" + TEST_PASSWORD_PROTECTED)
-                    .put(ClassLoader.getSystemResourceAsStream(TEST_PASSWORD_PROTECTED));
+                    .put(getTestDocumentAsStream(TEST_PASSWORD_PROTECTED));
             assertNotNull("null response: " + path, response);
             assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus());
             String msg = getStringFromInputStream((InputStream) response
@@ -100,12 +100,12 @@ public class StackTraceTest extends CXFTestBase {
             Response response = WebClient
                     .create(endPoint + path)
                     .accept("*/*")
-                    .put(ClassLoader.getSystemResourceAsStream(TEST_NULL));
+                    .put(getTestDocumentAsStream(TEST_NULL));
             assertNotNull("null response: " + path, response);
             assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus());
             String msg = getStringFromInputStream((InputStream) response
                     .getEntity());
-            assertContains("Caused by: java.lang.NullPointerException: null pointer message",
+            assertContains("Caused by: java.lang.NullPointerException: another null pointer exception",
                     msg);
         }
     }
@@ -135,7 +135,7 @@ public class StackTraceTest extends CXFTestBase {
     //exceptions as the others...
     @Test
     public void testMeta() throws Exception {
-        InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+        InputStream stream = getTestDocumentAsStream(TikaResourceTest.TEST_DOC);
 
         Response response = WebClient.create(endPoint + "/meta" + "/Author").type("application/msword")
                 .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
index e4e60a5..5e5c735 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
@@ -79,9 +79,9 @@ public class TikaParsersTest extends CXFTestBase {
                 assertContains("audio/ogg", text);
             } else {
                 // Shouldn't do
-                assertNotFound("text/plain", text);
-                assertNotFound("application/pdf", text);
-                assertNotFound("audio/ogg", text);
+                assertNotContained("text/plain", text);
+                assertNotContained("application/pdf", text);
+                assertNotContained("audio/ogg", text);
             }
         }
     }
@@ -114,9 +114,9 @@ public class TikaParsersTest extends CXFTestBase {
                 assertContains("<li>audio/ogg", text);
             } else {
                 // Shouldn't do
-                assertNotFound("text/plain", text);
-                assertNotFound("application/pdf", text);
-                assertNotFound("audio/ogg", text);
+                assertNotContained("text/plain", text);
+                assertNotContained("application/pdf", text);
+                assertNotContained("audio/ogg", text);
             }
         }
     }


[11/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
new file mode 100644
index 0000000..b852de0
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -0,0 +1,1044 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+// Junit imports
+
+import static java.nio.charset.StandardCharsets.UTF_16BE;
+import static java.nio.charset.StandardCharsets.UTF_16LE;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+import org.apache.tika.Tika;
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * 
+ * Test Suite for the {@link MimeTypes} repository.
+ * 
+ */
+public class TestMimeTypes extends TikaTest {
+
+    private Tika tika;
+
+    private MimeTypes repo;
+
+    private URL u;
+
+    private static final File f = new File("/a/b/c/x.pdf");
+
+    @Before
+    public void setUp() throws Exception{
+        TikaConfig config = TikaConfig.getDefaultConfig();
+        repo = config.getMimeRepository();
+        tika = new Tika(config);
+        u = new URL("http://mydomain.com/x.pdf?x=y");
+    }
+
+    @Test
+    public void testCaseSensitivity() {
+        String type = tika.detect("test.PDF");
+        assertNotNull(type);
+        assertEquals(type, tika.detect("test.pdf"));
+        assertEquals(type, tika.detect("test.PdF"));
+        assertEquals(type, tika.detect("test.pdF"));
+    }
+
+    @Test
+    public void testLoadMimeTypes() throws MimeTypeException {
+        assertNotNull(repo.forName("application/octet-stream"));
+        assertNotNull(repo.forName("text/x-tex"));
+    }
+
+    /**
+     * Tests MIME type determination based solely on the URL's extension.
+     */
+    @Test
+    public void testGuessMimeTypes() throws Exception {
+        assertTypeByName("application/pdf", "x.pdf");
+        assertEquals("application/pdf", tika.detect(u.toExternalForm()));
+        assertEquals("application/pdf", tika.detect(f.getPath()));
+        assertTypeByName("text/plain", "x.txt");
+        assertTypeByName("text/html", "x.htm");
+        assertTypeByName("text/html", "x.html");
+        assertTypeByName("application/xhtml+xml", "x.xhtml");
+        assertTypeByName("application/xml", "x.xml");
+        assertTypeByName("application/zip", "x.zip");
+        assertTypeByName("application/vnd.oasis.opendocument.text", "x.odt");
+        assertTypeByName("application/octet-stream", "x.unknown");
+
+        // Test for the MS Office media types and file extensions listed in
+        // http://blogs.msdn.com/vsofficedeveloper/pages/Office-2007-Open-XML-MIME-Types.aspx
+        assertTypeByName("application/msword", "x.doc");
+        assertTypeByName("application/msword", "x.dot");
+        assertTypeByName("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "x.docx");
+        assertTypeByName("application/vnd.openxmlformats-officedocument.wordprocessingml.template", "x.dotx");
+        assertTypeByName("application/vnd.ms-word.document.macroenabled.12", "x.docm");
+        assertTypeByName("application/vnd.ms-word.template.macroenabled.12", "x.dotm");
+        assertTypeByName("application/vnd.ms-excel", "x.xls");
+        assertTypeByName("application/vnd.ms-excel", "x.xlt");
+        assertTypeByName("application/vnd.ms-excel", "x.xla");
+        assertTypeByName("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "x.xlsx");
+        assertTypeByName("application/vnd.openxmlformats-officedocument.spreadsheetml.template", "x.xltx");
+        assertTypeByName("application/vnd.ms-excel.sheet.macroenabled.12", "x.xlsm");
+        assertTypeByName("application/vnd.ms-excel.template.macroenabled.12", "x.xltm");
+        assertTypeByName("application/vnd.ms-excel.addin.macroenabled.12", "x.xlam");
+        assertTypeByName("application/vnd.ms-excel.sheet.binary.macroenabled.12", "x.xlsb");
+        assertTypeByName("application/vnd.ms-powerpoint", "x.ppt");
+        assertTypeByName("application/vnd.ms-powerpoint", "x.pot");
+        assertTypeByName("application/vnd.ms-powerpoint", "x.pps");
+        assertTypeByName("application/vnd.ms-powerpoint", "x.ppa");
+        assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.presentation", "x.pptx");
+        assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.template", "x.potx");
+        assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.slideshow", "x.ppsx");
+        assertTypeByName("application/vnd.ms-powerpoint.addin.macroenabled.12", "x.ppam");
+        assertTypeByName("application/vnd.ms-powerpoint.presentation.macroenabled.12", "x.pptm");
+        assertTypeByName("application/vnd.ms-powerpoint.template.macroenabled.12", "x.potm");
+        assertTypeByName("application/vnd.ms-powerpoint.slideshow.macroenabled.12", "x.ppsm");
+    }
+
+    /**
+     * Note - detecting container formats by mime magic is very very
+     *  iffy, as we can't be sure where things will end up.
+     * People really ought to use the container aware detection...
+     */
+    @Test
+    public void testOLE2Detection() throws Exception {
+        // These have the properties block near the start, so our mime
+        //  magic will spot them
+        assertTypeByData("application/vnd.ms-excel", "testEXCEL.xls");
+        
+        // This one quite legitimately doesn't have its properties block
+        //  as one of the first couple of entries
+        // As such, our mime magic can't figure it out...
+        assertTypeByData("application/x-tika-msoffice", "testWORD.doc");
+        assertTypeByData("application/x-tika-msoffice", "testPPT.ppt");
+        
+        
+        // By name + data:
+        
+        // Those we got right to start with are fine
+        assertTypeByNameAndData("application/vnd.ms-excel","testEXCEL.xls");
+        
+        // And the name lets us specialise the generic OOXML
+        //  ones to their actual type
+        assertTypeByNameAndData("application/vnd.ms-powerpoint", "testPPT.ppt");
+        assertTypeByNameAndData("application/msword", "testWORD.doc");
+    }
+    
+    /**
+     * Files generated by Works 7.0 Spreadsheet application use the OLE2
+     * structure and resemble Excel files (they contain a "Workbook"). They are
+     * not Excel though. They are distinguished from Excel files with an
+     * additional top-level entry in below the root of the POI filesystem.
+     * 
+     * @throws Exception
+     */
+    @Test
+    public void testWorksSpreadsheetDetection() throws Exception {
+        assertTypeDetection("testWORKSSpreadsheet7.0.xlr",
+                // with name-only, everything should be all right 
+                "application/x-tika-msworks-spreadsheet",
+                // this is possible due to MimeTypes guessing the type
+                // based on the WksSSWorkBook near the beginning of the
+                // file
+                "application/x-tika-msworks-spreadsheet",
+                // this is right, the magic-based detection works, there is
+                // no need for the name-based detection to refine it
+                "application/x-tika-msworks-spreadsheet");
+    }
+    
+    @Test
+    public void testStarOfficeDetection() throws Exception {
+        assertTypeDetection("testVORCalcTemplate.vor",
+                "application/x-staroffice-template",
+                "application/vnd.stardivision.calc",
+                "application/vnd.stardivision.calc");
+        assertTypeDetection("testVORDrawTemplate.vor",
+                "application/x-staroffice-template",
+                "application/vnd.stardivision.draw",
+                "application/vnd.stardivision.draw");
+        assertTypeDetection("testVORImpressTemplate.vor",
+                "application/x-staroffice-template",
+                "application/vnd.stardivision.impress",
+                "application/vnd.stardivision.impress");
+        assertTypeDetection("testVORWriterTemplate.vor",
+                "application/x-staroffice-template",
+                "application/vnd.stardivision.writer",
+                "application/vnd.stardivision.writer");
+        
+        assertTypeDetection("testStarOffice-5.2-calc.sdc",
+                "application/vnd.stardivision.calc",
+                "application/vnd.stardivision.calc",
+                "application/vnd.stardivision.calc");
+        assertTypeDetection("testStarOffice-5.2-draw.sda",
+                "application/vnd.stardivision.draw",
+                "application/vnd.stardivision.draw",
+                "application/vnd.stardivision.draw");
+        assertTypeDetection("testStarOffice-5.2-impress.sdd",
+                "application/vnd.stardivision.impress",
+                "application/vnd.stardivision.impress",
+                "application/vnd.stardivision.impress");
+        assertTypeDetection("testStarOffice-5.2-writer.sdw",
+                "application/vnd.stardivision.writer",
+                "application/vnd.stardivision.writer",
+                "application/vnd.stardivision.writer");
+    }
+    
+    /**
+     * Files generated by Works Word Processor versions 3.0 and 4.0 use the
+     * OLE2 structure. They don't resemble Word though.
+     * 
+     * @throws Exception
+     */
+    @Test
+    public void testOldWorksWordProcessorDetection() throws Exception {
+        assertTypeDetection(
+                "testWORKSWordProcessor3.0.wps",
+                // .wps is just like any other works extension
+                "application/vnd.ms-works",
+                // this is due to MatOST substring
+                "application/vnd.ms-works",
+                // magic-based detection works, no need to refine it
+                "application/vnd.ms-works");
+        
+        // files in version 4.0 are no different from those in version 3.0
+        assertTypeDetection(
+                "testWORKSWordProcessor4.0.wps",
+                "application/vnd.ms-works",
+                "application/vnd.ms-works",
+                "application/vnd.ms-works");
+    }
+    
+    /**
+     * Files from Excel 2 through 4 are based on the BIFF record
+     *  structure, but without a wrapping OLE2 structure.
+     * Excel 5 and Excel 95+ work on OLE2
+     */
+    @Test
+    public void testOldExcel() throws Exception {
+        // With just a name, we'll think everything's a new Excel file
+        assertTypeByName("application/vnd.ms-excel","testEXCEL_4.xls");
+        assertTypeByName("application/vnd.ms-excel","testEXCEL_5.xls");
+        assertTypeByName("application/vnd.ms-excel","testEXCEL_95.xls");
+        
+        // With data, we can work out if it's old or new style
+        assertTypeByData("application/vnd.ms-excel.sheet.4","testEXCEL_4.xls");
+        assertTypeByData("application/x-tika-msoffice","testEXCEL_5.xls");
+        assertTypeByData("application/x-tika-msoffice","testEXCEL_95.xls");
+        
+        assertTypeByNameAndData("application/vnd.ms-excel.sheet.4","testEXCEL_4.xls");
+        assertTypeByNameAndData("application/vnd.ms-excel","testEXCEL_5.xls");
+        assertTypeByNameAndData("application/vnd.ms-excel","testEXCEL_95.xls");
+    }
+    
+    /**
+     * Note - detecting container formats by mime magic is very very
+     *  iffy, as we can't be sure where things will end up.
+     * People really ought to use the container aware detection...
+     */
+    @Test
+    public void testOoxmlDetection() throws Exception {
+        // These two do luckily have [Content_Types].xml near the start,
+        //  so our mime magic will spot them
+        assertTypeByData("application/x-tika-ooxml", "testEXCEL.xlsx");
+        assertTypeByData("application/x-tika-ooxml", "testPPT.pptx");
+        
+        // This one quite legitimately doesn't have its [Content_Types].xml
+        //  file as one of the first couple of entries
+        // As such, our mime magic can't figure it out...
+        assertTypeByData("application/zip", "testWORD.docx");
+        
+        // If we give the filename as well as the data, we can
+        //  specialise the ooxml generic one to the correct type
+        assertTypeByNameAndData("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "testEXCEL.xlsx");
+        assertTypeByNameAndData("application/vnd.openxmlformats-officedocument.presentationml.presentation", "testPPT.pptx");
+        assertTypeByNameAndData("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "testWORD.docx");
+        
+        // Test a few of the less usual ones
+        assertTypeByNameAndData("application/vnd.ms-excel.sheet.binary.macroenabled.12","testEXCEL.xlsb");
+        assertTypeByNameAndData("application/vnd.ms-powerpoint.presentation.macroenabled.12", "testPPT.pptm");
+        assertTypeByNameAndData("application/vnd.ms-powerpoint.template.macroenabled.12", "testPPT.potm");
+        assertTypeByNameAndData("application/vnd.ms-powerpoint.slideshow.macroenabled.12", "testPPT.ppsm");
+    }
+    
+    /**
+     * Note - container based formats, needs container detection
+     *  to be properly correct
+     */
+    @Test
+    public void testVisioDetection() throws Exception {
+        // By Name, should get it right
+        assertTypeByName("application/vnd.visio", "testVISIO.vsd");
+        assertTypeByName("application/vnd.ms-visio.drawing.macroenabled.12", "testVISIO.vsdm");
+        assertTypeByName("application/vnd.ms-visio.drawing", "testVISIO.vsdx");
+        assertTypeByName("application/vnd.ms-visio.stencil.macroenabled.12", "testVISIO.vssm");
+        assertTypeByName("application/vnd.ms-visio.stencil", "testVISIO.vssx");
+        assertTypeByName("application/vnd.ms-visio.template.macroenabled.12", "testVISIO.vstm");
+        assertTypeByName("application/vnd.ms-visio.template", "testVISIO.vstx");
+        
+        // By Name and Data, should get it right
+        assertTypeByNameAndData("application/vnd.visio", "testVISIO.vsd");
+        assertTypeByNameAndData("application/vnd.ms-visio.drawing.macroenabled.12", "testVISIO.vsdm");
+        assertTypeByNameAndData("application/vnd.ms-visio.drawing", "testVISIO.vsdx");
+        assertTypeByNameAndData("application/vnd.ms-visio.stencil.macroenabled.12", "testVISIO.vssm");
+        assertTypeByNameAndData("application/vnd.ms-visio.stencil", "testVISIO.vssx");
+        assertTypeByNameAndData("application/vnd.ms-visio.template.macroenabled.12", "testVISIO.vstm");
+        assertTypeByNameAndData("application/vnd.ms-visio.template", "testVISIO.vstx");
+        
+        // By Data only, will get the container parent
+        assertTypeByData("application/x-tika-msoffice", "testVISIO.vsd");
+        assertTypeByData("application/x-tika-ooxml", "testVISIO.vsdm");
+        assertTypeByData("application/x-tika-ooxml", "testVISIO.vsdx");
+        assertTypeByData("application/x-tika-ooxml", "testVISIO.vssm");
+        assertTypeByData("application/x-tika-ooxml", "testVISIO.vssx");
+        assertTypeByData("application/x-tika-ooxml", "testVISIO.vstm");
+        assertTypeByData("application/x-tika-ooxml", "testVISIO.vstx");
+    }
+
+    /**
+     * Note - detecting container formats by mime magic is very very
+     *  iffy, as we can't be sure where things will end up.
+     * People really ought to use the container aware detection...
+     */
+    @Test
+    public void testIWorkDetection() throws Exception {
+        // By name is easy
+       assertTypeByName("application/vnd.apple.keynote", "testKeynote.key");
+       assertTypeByName("application/vnd.apple.numbers", "testNumbers.numbers");
+       assertTypeByName("application/vnd.apple.pages", "testPages.pages");
+       
+       // We can't do it by data, as we'd need to unpack
+       //  the zip file to check the XML 
+       assertTypeByData("application/zip", "testKeynote.key");
+       
+       assertTypeByNameAndData("application/vnd.apple.keynote", "testKeynote.key");
+       assertTypeByNameAndData("application/vnd.apple.numbers", "testNumbers.numbers");
+       assertTypeByNameAndData("application/vnd.apple.pages", "testPages.pages");
+    }
+    
+    @Test
+    public void testArchiveDetection() throws Exception {
+       assertTypeByName("application/x-archive", "test.ar");
+       assertTypeByName("application/zip",    "test.zip");
+       assertTypeByName("application/x-tar",  "test.tar");
+       assertTypeByName("application/gzip", "test.tgz"); // See GZIP, not tar contents of it
+       assertTypeByName("application/x-cpio", "test.cpio");
+       
+       // TODO Add an example .deb and .udeb, then check these
+       
+       // Check the mime magic patterns for them work too
+       assertTypeByData("application/x-archive", "testARofText.ar");
+       assertTypeByData("application/x-archive", "testARofSND.ar"); 
+       assertTypeByData("application/zip",    "test-documents.zip");
+       assertTypeByData("application/x-gtar",  "test-documents.tar"); // GNU TAR
+       assertTypeByData("application/gzip", "test-documents.tgz"); // See GZIP, not tar contents of it
+       assertTypeByData("application/x-cpio", "test-documents.cpio");
+       
+       // For spanned zip files, the .zip file doesn't have the header, it's the other parts
+       assertTypeByData("application/octet-stream", "test-documents-spanned.zip");
+       assertTypeByData("application/zip",          "test-documents-spanned.z01");
+    }
+    
+    @Test
+    public void testFeedsDetection() throws Exception {
+        assertType("application/rss+xml",  "rsstest.rss");
+        assertType("application/atom+xml", "testATOM.atom");
+        assertTypeByData("application/rss+xml",  "rsstest.rss");
+        assertTypeByName("application/rss+xml",  "rsstest.rss");
+        assertTypeByData("application/atom+xml", "testATOM.atom");
+        assertTypeByName("application/atom+xml", "testATOM.atom");
+    }
+    
+    @Test
+    public void testFitsDetection() throws Exception {
+        // FITS image created using imagemagick convert of testJPEG.jpg
+        assertType("application/fits", "testFITS.fits");
+        assertTypeByData("application/fits", "testFITS.fits");
+        assertTypeByName("application/fits", "testFITS.fits");
+    }
+
+    @Test
+    public void testJpegDetection() throws Exception {
+        assertType("image/jpeg", "testJPEG.jpg");
+        assertTypeByData("image/jpeg", "testJPEG.jpg");
+        assertTypeByName("image/jpeg", "x.jpg");
+        assertTypeByName("image/jpeg", "x.JPG");
+        assertTypeByName("image/jpeg", "x.jpeg");
+        assertTypeByName("image/jpeg", "x.JPEG");
+        assertTypeByName("image/jpeg", "x.jpe");
+        assertTypeByName("image/jpeg", "x.jif");
+        assertTypeByName("image/jpeg", "x.jfif");
+        assertTypeByName("image/jpeg", "x.jfi");
+        
+        assertType("image/jp2", "testJPEG.jp2");
+        assertTypeByData("image/jp2", "testJPEG.jp2");
+        assertTypeByName("image/jp2", "x.jp2");
+    }
+
+    @Test
+    public void testBpgDetection() throws Exception {
+        assertType("image/x-bpg", "testBPG.bpg");
+        assertTypeByData("image/x-bpg", "testBPG.bpg");
+        assertTypeByData("image/x-bpg", "testBPG_commented.bpg");
+        assertTypeByName("image/x-bpg", "x.bpg");
+    }
+    
+    @Test
+    public void testTiffDetection() throws Exception {
+        assertType("image/tiff", "testTIFF.tif");
+        assertTypeByData("image/tiff", "testTIFF.tif");
+        assertTypeByName("image/tiff", "x.tiff");
+        assertTypeByName("image/tiff", "x.tif");
+        assertTypeByName("image/tiff", "x.TIF");
+    }
+
+    @Test
+    public void testGifDetection() throws Exception {
+        assertType("image/gif", "testGIF.gif");
+        assertTypeByData("image/gif", "testGIF.gif");
+        assertTypeByName("image/gif", "x.gif");
+        assertTypeByName("image/gif", "x.GIF");
+    }
+
+    @Test
+    public void testPngDetection() throws Exception {
+        assertType("image/png", "testPNG.png");
+        assertTypeByData("image/png", "testPNG.png");
+        assertTypeByName("image/png", "x.png");
+        assertTypeByName("image/png", "x.PNG");
+    }
+
+    @Test
+    public void testWEBPDetection() throws Exception {
+        assertType("image/webp", "testWEBP.webp");
+        assertTypeByData("image/webp", "testWEBP.webp");
+        assertTypeByName("image/webp", "x.webp");
+        assertTypeByName("image/webp", "x.WEBP");
+    }
+
+    @Test
+    public void testBmpDetection() throws Exception {
+        assertType("image/x-ms-bmp", "testBMP.bmp");
+        assertTypeByData("image/x-ms-bmp", "testBMP.bmp");
+        assertTypeByName("image/x-ms-bmp", "x.bmp");
+        assertTypeByName("image/x-ms-bmp", "x.BMP");
+        assertTypeByName("image/x-ms-bmp", "x.dib");
+        assertTypeByName("image/x-ms-bmp", "x.DIB");
+        //false positive check -- contains part of BMP signature
+        assertType("text/plain", "testBMPfp.txt");
+    }
+
+    @Test
+    public void testPnmDetection() throws Exception {
+        assertType("image/x-portable-bitmap", "testPBM.pbm");
+        assertType("image/x-portable-graymap", "testPGM.pgm");
+        assertType("image/x-portable-pixmap", "testPPM.ppm");
+        assertTypeByData("image/x-portable-bitmap", "testPBM.pbm");
+        assertTypeByData("image/x-portable-graymap", "testPGM.pgm");
+        assertTypeByData("image/x-portable-pixmap", "testPPM.ppm");
+        assertTypeByName("image/x-portable-anymap", "x.pnm");
+        assertTypeByName("image/x-portable-anymap", "x.PNM");
+        assertTypeByName("image/x-portable-bitmap", "x.pbm");
+        assertTypeByName("image/x-portable-bitmap", "x.PBM");
+        assertTypeByName("image/x-portable-graymap", "x.pgm");
+        assertTypeByName("image/x-portable-graymap", "x.PGM");
+        assertTypeByName("image/x-portable-pixmap", "x.ppm");
+        assertTypeByName("image/x-portable-pixmap", "x.PPM");
+    }
+
+    @Test
+    public void testPictDetection() throws Exception {
+        assertType("image/x-pict", "testPICT.pct");
+        assertTypeByData("image/x-pict", "testPICT.pct");
+        assertTypeByName("image/x-pict", "x.pic");
+        assertTypeByName("image/x-pict", "x.PCT");
+    }
+
+    @Test
+    public void testCgmDetection() throws Exception {
+        // TODO: Need a test image file
+        assertTypeByName("image/cgm", "x.cgm");
+        assertTypeByName("image/cgm", "x.CGM");
+    }
+
+    @Test
+    public void testRdfXmlDetection() throws Exception {
+        assertTypeByName("application/rdf+xml", "x.rdf");
+        assertTypeByName("application/rdf+xml", "x.owl");
+    }
+
+    @Test
+    public void testSvgDetection() throws Exception {
+        assertType("image/svg+xml", "testSVG.svg");
+        assertTypeByData("image/svg+xml", "testSVG.svg");
+        assertTypeByName("image/svg+xml", "x.svg");
+        assertTypeByName("image/svg+xml", "x.SVG");
+
+        // Should *.svgz be svg or gzip
+        assertType("application/gzip", "testSVG.svgz");
+        assertTypeByData("application/gzip", "testSVG.svgz");
+        assertTypeByName("image/svg+xml", "x.svgz");
+        assertTypeByName("image/svg+xml", "x.SVGZ");
+    }
+
+    @Test
+    public void testPdfDetection() throws Exception {
+        // PDF extension by name is enough
+        assertTypeByName("application/pdf", "x.pdf");
+        assertTypeByName("application/pdf", "x.PDF");
+
+        // For normal PDFs, can get by name or data or both
+        assertType("application/pdf", "testPDF.pdf");
+        assertTypeByData("application/pdf", "testPDF.pdf");
+
+        // PDF with a BoM works both ways too
+        assertType("application/pdf", "testPDF_bom.pdf");
+        assertTypeByData("application/pdf", "testPDF_bom.pdf");
+    }
+
+    @Test
+    public void testSwfDetection() throws Exception {
+        assertTypeByName("application/x-shockwave-flash", "x.swf");
+        assertTypeByName("application/x-shockwave-flash", "x.SWF");
+        assertTypeByName("application/x-shockwave-flash", "test1.swf");
+        assertTypeByName("application/x-shockwave-flash", "test2.swf");
+        assertTypeByName("application/x-shockwave-flash", "test3.swf");
+    }
+
+    @Test
+    public void testDwgDetection() throws Exception {
+        assertTypeByName("image/vnd.dwg", "x.dwg");
+        assertTypeByData("image/vnd.dwg", "testDWG2004.dwg");
+        assertTypeByData("image/vnd.dwg", "testDWG2007.dwg");
+        assertTypeByData("image/vnd.dwg", "testDWG2010.dwg");
+    }
+
+    @Test
+    public void testprtDetection() throws Exception {
+       assertTypeByName("application/x-prt", "x.prt");
+       assertTypeByData("application/x-prt", "testCADKEY.prt");
+   }
+    
+    /**
+     * Formats which are based on plain text
+     */
+    @Test
+    public void testTextBasedFormatsDetection() throws Exception {
+       assertTypeByName("text/plain", "testTXT.txt");
+       assertType(      "text/plain", "testTXT.txt");
+       
+       assertTypeByName("text/css", "testCSS.css");
+       assertType(      "text/css", "testCSS.css");
+       
+       assertTypeByName("text/csv", "testCSV.csv");
+       assertType(      "text/csv", "testCSV.csv");
+       
+       assertTypeByName("text/html", "testHTML.html");
+       assertType(      "text/html", "testHTML.html");
+       
+       assertTypeByName("application/javascript", "testJS.js");
+       assertType(      "application/javascript", "testJS.js");
+    }
+    
+    @Test
+    public void testJavaDetection() throws Exception {
+        // TODO Classloader doesn't seem to find the .class file in test-documents
+        //assertTypeDetection("AutoDetectParser.class", "application/java-vm");
+        
+        // OSX Native Extension
+        assertTypeDetection("testJNILIB.jnilib", "application/x-java-jnilib");
+    }
+
+    @Test
+    public void testXmlAndHtmlDetection() throws Exception {
+        assertTypeByData("application/xml", "<?xml version=\"1.0\" encoding=\"UTF-8\"?><records><record/></records>"
+                .getBytes(UTF_8));
+        assertTypeByData("application/xml", "\uFEFF<?xml version=\"1.0\" encoding=\"UTF-16\"?><records><record/></records>"
+                .getBytes(UTF_16LE));
+        assertTypeByData("application/xml", "\uFEFF<?xml version=\"1.0\" encoding=\"UTF-16\"?><records><record/></records>"
+                .getBytes(UTF_16BE));
+        assertTypeByData("application/xml", "<!-- XML without processing instructions --><records><record/></records>"
+                .getBytes(UTF_8));
+        assertTypeByData("text/html", "<html><body>HTML</body></html>"
+                .getBytes(UTF_8));
+        assertTypeByData("text/html", "<!-- HTML comment --><html><body>HTML</body></html>"
+                .getBytes(UTF_8));
+    }
+
+    @Test
+    public void testWmfDetection() throws Exception {
+        assertTypeByName("application/x-msmetafile", "x.wmf");
+        assertTypeByData("application/x-msmetafile", "testWMF.wmf");
+        assertTypeByName("application/x-msmetafile", "x.WMF");
+
+        assertTypeByName("application/x-emf", "x.emf");
+        assertTypeByData("application/x-emf","testEMF.emf");
+        assertTypeByName("application/x-emf", "x.EMF");
+        // TODO: Need a test wmz file
+        assertTypeByName("application/x-ms-wmz", "x.wmz");
+        assertTypeByName("application/x-ms-wmz", "x.WMZ");
+        // TODO: Need a test emz file
+        assertTypeByName("application/gzip", "x.emz");
+        assertTypeByName("application/gzip", "x.EMZ");
+    }
+
+    @Test
+    public void testPsDetection() throws Exception {
+        // TODO: Need a test postscript file
+        assertTypeByName("application/postscript", "x.ps");
+        assertTypeByName("application/postscript", "x.PS");
+        assertTypeByName("application/postscript", "x.eps");
+        assertTypeByName("application/postscript", "x.epsf");
+        assertTypeByName("application/postscript", "x.epsi");
+    }
+    
+    @Test
+    public void testMicrosoftMultiMediaDetection() throws Exception {
+       assertTypeByName("video/x-ms-asf", "x.asf");
+       assertTypeByName("video/x-ms-wmv", "x.wmv");
+       assertTypeByName("audio/x-ms-wma", "x.wma");
+       
+       assertTypeByData("video/x-ms-asf", "testASF.asf");
+       assertTypeByData("video/x-ms-wmv", "testWMV.wmv");
+       assertTypeByData("audio/x-ms-wma", "testWMA.wma");
+    }
+    
+    /**
+     * All 3 DITA types are in theory handled by the same mimetype,
+     *  but we specialise them 
+     */
+    @Test
+    public void testDITADetection() throws Exception {
+       assertTypeByName("application/dita+xml; format=topic", "test.dita");
+       assertTypeByName("application/dita+xml; format=map", "test.ditamap");
+       assertTypeByName("application/dita+xml; format=val", "test.ditaval");
+       
+       assertTypeByData("application/dita+xml; format=task", "testDITA.dita");
+       assertTypeByData("application/dita+xml; format=concept", "testDITA2.dita");
+       assertTypeByData("application/dita+xml; format=map", "testDITA.ditamap");
+       
+       assertTypeByNameAndData("application/dita+xml; format=task", "testDITA.dita");
+       assertTypeByNameAndData("application/dita+xml; format=concept", "testDITA2.dita");
+       assertTypeByNameAndData("application/dita+xml; format=map", "testDITA.ditamap");
+       
+       // These are all children of the official type
+       assertEquals("application/dita+xml", 
+             repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData("testDITA.ditamap")).toString());
+       assertEquals("application/dita+xml", 
+             repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData("testDITA.dita")).toString());
+       // Concept inherits from topic
+       assertEquals("application/dita+xml; format=topic", 
+             repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData("testDITA2.dita")).toString());
+    }
+
+    /**
+     * @since TIKA-194
+     */
+    @Test
+    public void testJavaRegex() throws Exception{
+        MimeType testType = new MimeType(MediaType.parse("foo/bar"));
+        this.repo.add(testType);
+        assertNotNull(repo.forName("foo/bar"));
+        String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
+        this.repo.addPattern(testType, pattern, true);
+        String testFileName = "rtg_sst_grb_0.5.12345678";
+        assertEquals("foo/bar", tika.detect(testFileName));
+
+        MimeType testType2 = new MimeType(MediaType.parse("foo/bar2"));
+        this.repo.add(testType2);
+        assertNotNull(repo.forName("foo/bar2"));
+        this.repo.addPattern(testType2, pattern, false);
+        assertNotSame("foo/bar2", tika.detect(testFileName));
+    }
+    
+    @Test
+    public void testRawDetection() throws Exception {
+        assertTypeByName("image/x-raw-adobe", "x.dng");
+        assertTypeByName("image/x-raw-adobe", "x.DNG");
+        assertTypeByName("image/x-raw-hasselblad", "x.3fr");
+        assertTypeByName("image/x-raw-fuji", "x.raf");
+        assertTypeByName("image/x-raw-canon", "x.crw");
+        assertTypeByName("image/x-raw-canon", "x.cr2");
+        assertTypeByName("image/x-raw-kodak", "x.k25");
+        assertTypeByName("image/x-raw-kodak", "x.kdc");
+        assertTypeByName("image/x-raw-kodak", "x.dcs");
+        assertTypeByName("image/x-raw-kodak", "x.drf");
+        assertTypeByName("image/x-raw-minolta", "x.mrw");
+        assertTypeByName("image/x-raw-nikon", "x.nef");
+        assertTypeByName("image/x-raw-nikon", "x.nrw");
+        assertTypeByName("image/x-raw-olympus", "x.orf");
+        assertTypeByName("image/x-raw-pentax", "x.ptx");
+        assertTypeByName("image/x-raw-pentax", "x.pef");
+        assertTypeByName("image/x-raw-sony", "x.arw");
+        assertTypeByName("image/x-raw-sony", "x.srf");
+        assertTypeByName("image/x-raw-sony", "x.sr2");
+        assertTypeByName("image/x-raw-sigma", "x.x3f");
+        assertTypeByName("image/x-raw-epson", "x.erf");
+        assertTypeByName("image/x-raw-mamiya", "x.mef");
+        assertTypeByName("image/x-raw-leaf", "x.mos");
+        assertTypeByName("image/x-raw-panasonic", "x.raw");
+        assertTypeByName("image/x-raw-panasonic", "x.rw2");
+        assertTypeByName("image/x-raw-phaseone", "x.iiq");
+        assertTypeByName("image/x-raw-red", "x.r3d");
+        assertTypeByName("image/x-raw-imacon", "x.fff");
+        assertTypeByName("image/x-raw-logitech", "x.pxn");
+        assertTypeByName("image/x-raw-casio", "x.bay");
+        assertTypeByName("image/x-raw-rawzor", "x.rwz");
+    }
+    
+    /**
+     * Tests that we correctly detect the font types
+     */
+    @Test
+    public void testFontDetection() throws Exception {
+       assertTypeByName("application/x-font-adobe-metric", "x.afm");
+       assertTypeByData("application/x-font-adobe-metric", "testAFM.afm");
+       
+       assertTypeByName("application/x-font-printer-metric", "x.pfm");
+       // TODO Get a sample .pfm file
+       assertTypeByData(
+             "application/x-font-printer-metric", 
+             new byte[] {0x00, 0x01, 256-0xb1, 0x0a, 0x00, 0x00, 0x43, 0x6f,  
+                         0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20}
+       );
+       
+       assertTypeByName("application/x-font-type1", "x.pfa");
+       // TODO Get a sample .pfa file
+       assertTypeByData(
+             "application/x-font-type1", 
+             new byte[] {0x25, 0x21, 0x50, 0x53, 0x2d, 0x41, 0x64, 0x6f,
+                         0x62, 0x65, 0x46, 0x6f, 0x6e, 0x74, 0x2d, 0x31,
+                         0x2e, 0x30, 0x20, 0x20, 0x2d, 0x2a, 0x2d, 0x20}
+       );
+       
+       assertTypeByName("application/x-font-type1", "x.pfb");
+       // TODO Get a sample .pfm file
+       assertTypeByData(
+             "application/x-font-type1", 
+             new byte[] {-0x80, 0x01, 0x09, 0x05, 0x00, 0x00, 0x25, 0x21,
+                          0x50, 0x53, 0x2d, 0x41, 0x64, 0x6f, 0x62, 0x65,
+                          0x46, 0x6f, 0x6e, 0x74, 0x2d, 0x31, 0x2e, 0x30 }
+       );
+    }
+
+    /**
+     * Tests MimeTypes.getMimeType(URL), which examines both the byte header
+     * and, if necessary, the URL's extension.
+     */
+    @Test
+    public void testMimeDeterminationForTestDocuments() throws Exception {
+        assertType("text/html", "testHTML.html");
+        assertType("application/zip", "test-documents.zip");
+
+        assertType("text/html", "testHTML_utf8.html");
+        assertType(
+                "application/vnd.oasis.opendocument.text",
+                "testOpenOffice2.odt");
+        assertType("application/pdf", "testPDF.pdf");
+        assertType("application/rtf", "testRTF.rtf");
+        assertType("text/plain", "testTXT.txt");
+        assertType("application/xml", "testXML.xml");
+        assertType("audio/basic", "testAU.au");
+        assertType("audio/x-aiff", "testAIFF.aif");
+        assertType("audio/x-wav", "testWAV.wav");
+        assertType("audio/midi", "testMID.mid");
+        assertType("application/x-msaccess", "testACCESS.mdb");
+        assertType("application/x-font-ttf", "testTrueType3.ttf");
+    }
+    
+    @Test
+    public void test7ZipDetection() throws Exception {
+       assertTypeByName("application/x-7z-compressed","test-documents.7z");
+       assertTypeByData("application/x-7z-compressed","test-documents.7z");
+       assertTypeByNameAndData("application/x-7z-compressed", "test-documents.7z");
+   }
+
+    @Test
+    public void testWebArchiveDetection() throws Exception {
+        assertTypeByName("application/x-webarchive","x.webarchive");
+        assertTypeByData("application/x-bplist","testWEBARCHIVE.webarchive");
+        assertTypeByNameAndData("application/x-webarchive", "testWEBARCHIVE.webarchive");
+    }
+
+    /**
+     * KML, and KMZ (zipped KML)
+     */
+    @Test
+    public void testKMLZDetection() throws Exception {
+       assertTypeByName("application/vnd.google-earth.kml+xml","testKML.kml");
+       assertTypeByData("application/vnd.google-earth.kml+xml","testKML.kml");
+       assertTypeByNameAndData("application/vnd.google-earth.kml+xml", "testKML.kml");
+       
+       assertTypeByName("application/vnd.google-earth.kmz","testKMZ.kmz");
+       assertTypeByNameAndData("application/vnd.google-earth.kmz", "testKMZ.kmz");
+       
+       // By data only, mimetype magic only gets us to a .zip
+       // We need to use the Zip Aware detector to get the full type
+       assertTypeByData("application/zip","testKMZ.kmz");
+   }
+
+    @Test
+    public void testCreativeSuite() throws IOException {
+        assertTypeDetection("testINDD.indd", "application/x-adobe-indesign");
+        assertTypeDetection("testPSD.psd", "image/vnd.adobe.photoshop");
+    }
+    
+    @Test
+    public void testAMR() throws IOException {
+        // AMR matches on name, data or both
+        assertTypeDetection("testAMR.amr", "audio/amr");
+        
+        // AMR-WB subtype shares extension, so needs data to identify
+        assertTypeDetection("testAMR-WB.amr", "audio/amr", "audio/amr-wb", "audio/amr-wb");
+        
+        // Ditto for the AMR-WB+ subtype, which we don't have a sample file of yet
+        //assertTypeDetection("testAMR-WB+.amr", "audio/amr", "audio/amr-wb+", "audio/amr-wb+");
+    }
+    
+    @Test
+    public void testEmail() throws IOException {
+        // EMLX
+        assertTypeDetection("testEMLX.emlx", "message/x-emlx");
+        
+        // Groupwise
+        assertTypeDetection("testGroupWiseEml.eml", "message/rfc822");
+        
+        // Lotus
+        assertTypeDetection("testLotusEml.eml", "message/rfc822");
+        
+        // Thunderbird - doesn't currently work by name
+        assertTypeByNameAndData("message/rfc822", "testThunderbirdEml.eml");
+    }
+    
+    @Test
+    public void testAxCrypt() throws Exception {
+        // test-TXT.txt encrypted with a key of "tika"
+        assertTypeDetection("testTXT-tika.axx", "application/x-axcrypt");
+    }
+    
+    @Test
+    public void testWindowsEXE() throws Exception {
+        assertTypeByName("application/x-msdownload", "x.dll");
+        assertTypeByName("application/x-ms-installer", "x.msi");
+        assertTypeByName("application/x-dosexec", "x.exe");
+        
+        assertTypeByData("application/x-msdownload; format=pe", "testTinyPE.exe");
+        assertTypeByNameAndData("application/x-msdownload; format=pe", "testTinyPE.exe");
+        
+        // A jar file with part of a PE header, but not a full one
+        //  should still be detected as a zip or jar (without/with name)
+        assertTypeByData("application/zip", "testJAR_with_PEHDR.jar");
+        assertTypeByNameAndData("application/java-archive", "testJAR_with_PEHDR.jar");
+    }
+    
+    @Test
+    public void testMatroskaDetection() throws Exception {
+        assertType("video/x-matroska", "testMKV.mkv");
+        // TODO: Need custom detector data detection, see TIKA-1180
+        assertTypeByData("application/x-matroska", "testMKV.mkv");
+        assertTypeByNameAndData("video/x-matroska", "testMKV.mkv");
+        assertTypeByName("video/x-matroska", "x.mkv");
+        assertTypeByName("video/x-matroska", "x.MKV");
+        assertTypeByName("audio/x-matroska", "x.mka");
+        assertTypeByName("audio/x-matroska", "x.MKA");
+    }
+    
+    @Test
+    public void testWebMDetection() throws Exception {
+        assertType("video/webm", "testWEBM.webm");
+        // TODO: Need custom detector data detection, see TIKA-1180
+        assertTypeByData("application/x-matroska", "testWEBM.webm");
+        assertTypeByNameAndData("video/webm", "testWEBM.webm");
+        assertTypeByName("video/webm", "x.webm");
+        assertTypeByName("video/webm", "x.WEBM");
+    }
+
+    /** Test getMimeType(byte[]) */
+    @Test
+    public void testGetMimeType_byteArray() throws IOException {
+        // Plain text detection
+        assertText(new byte[] { (byte) 0xFF, (byte) 0xFE });
+        assertText(new byte[] { (byte) 0xFF, (byte) 0xFE });
+        assertText(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
+        assertText(new byte[] { 'a', 'b', 'c' });
+        assertText(new byte[] { '\t', '\r', '\n', 0x0C, 0x1B });
+        assertNotText(new byte[] { '\t', '\r', '\n', 0x0E, 0x1C });
+    }
+    
+    @Test
+    public void testBerkeleyDB() throws IOException {
+        assertTypeByData(
+                "application/x-berkeley-db; format=btree; version=2", 
+                "testBDB_btree_2.db");
+        assertTypeByData(
+                "application/x-berkeley-db; format=btree; version=3", 
+                "testBDB_btree_3.db");
+        assertTypeByData(
+                "application/x-berkeley-db; format=btree; version=4", 
+                "testBDB_btree_4.db");
+        // V4 and V5 share the same btree format
+        assertTypeByData(
+                "application/x-berkeley-db; format=btree; version=4", 
+                "testBDB_btree_5.db");
+        
+        assertTypeByData(
+                "application/x-berkeley-db; format=hash; version=2", 
+                "testBDB_hash_2.db");
+        assertTypeByData(
+                "application/x-berkeley-db; format=hash; version=3", 
+                "testBDB_hash_3.db");
+        assertTypeByData(
+                "application/x-berkeley-db; format=hash; version=4", 
+                "testBDB_hash_4.db");
+        assertTypeByData(
+                "application/x-berkeley-db; format=hash; version=5", 
+                "testBDB_hash_5.db");
+    }
+    
+    /**
+     * CBOR typically contains HTML
+     */
+    @Test
+    public void testCBOR() throws IOException {
+        assertTypeByNameAndData("application/cbor", "NUTCH-1997.cbor");
+        assertTypeByData("application/cbor", "NUTCH-1997.cbor");
+    }
+    
+    @Test
+    public void testZLIB() throws IOException {
+        // ZLIB encoded versions of testTXT.txt
+        assertTypeByData("application/zlib", "testTXT.zlib");
+        assertTypeByData("application/zlib", "testTXT.zlib0");
+        assertTypeByData("application/zlib", "testTXT.zlib5");
+        assertTypeByData("application/zlib", "testTXT.zlib9");
+    }
+    
+    @Test
+    public void testTextFormats() throws Exception {
+        assertType("application/x-bibtex-text-file", "testBIBTEX.bib");
+        assertTypeByData("application/x-bibtex-text-file", "testBIBTEX.bib");
+    }
+    
+    @Test
+    public void testCodeFormats() throws Exception {
+        assertType("text/x-csrc", "testC.c");
+        assertType("text/x-chdr", "testH.h");
+        assertTypeByData("text/x-csrc", "testC.c");
+        assertTypeByData("text/x-chdr", "testH.h");
+        
+        assertTypeByName("text/x-java-source", "testJAVA.java");
+        assertType("text/x-java-properties", "testJAVAPROPS.properties");
+        
+        assertType("text/x-matlab", "testMATLAB.m");
+        assertType("text/x-matlab", "testMATLAB_wtsgaus.m");
+        assertType("text/x-matlab", "testMATLAB_barcast.m");
+        assertTypeByData("text/x-matlab", "testMATLAB.m");
+        assertTypeByData("text/x-matlab", "testMATLAB_wtsgaus.m");
+        assertTypeByData("text/x-matlab", "testMATLAB_barcast.m");
+    }
+
+    @Test
+    public void testWebVTT() throws Exception {
+        assertType("text/vtt", "testWebVTT.vtt");
+        assertTypeByData("text/vtt", "testWebVTT.vtt");
+    }
+    
+    private void assertText(byte[] prefix) throws IOException {
+        assertMagic("text/plain", prefix);
+    }
+
+    private void assertNotText(byte[] prefix) throws IOException {
+        assertMagic("application/octet-stream", prefix);
+    }
+
+    private void assertMagic(String expected, byte[] prefix) throws IOException {
+        MediaType type =
+                repo.detect(new ByteArrayInputStream(prefix), new Metadata());
+        assertNotNull(type);
+        assertEquals(expected, type.toString());
+    }
+
+    private void assertType(String expected, String filename) throws Exception {
+        try (InputStream stream = getTestDocumentAsStream(filename)) {
+            Metadata metadata = new Metadata();
+            metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+            assertEquals(expected, repo.detect(stream, metadata).toString());
+        }
+    }
+
+    private void assertTypeByName(String expected, String filename)
+            throws IOException {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+        assertEquals(expected, repo.detect(null, metadata).toString());
+    }
+
+    private void assertTypeByData(String expected, String filename)
+            throws IOException {
+        try (InputStream stream = getTestDocumentAsStream(filename)) {
+            Metadata metadata = new Metadata();
+            assertEquals(expected, repo.detect(stream, metadata).toString());
+        }
+    }
+
+    private void assertTypeByData(String expected, byte[] data)
+            throws IOException {
+        try (InputStream stream = new ByteArrayInputStream(data)) {
+            Metadata metadata = new Metadata();
+            assertEquals(expected, repo.detect(stream, metadata).toString());
+        }
+    }
+
+    private void assertTypeDetection(String filename, String type)
+            throws IOException {
+        assertTypeDetection(filename, type, type, type);
+    }
+
+    private void assertTypeDetection(String filename, String byName, String byData, 
+            String byNameAndData) throws IOException {
+        assertTypeByName(byName, filename);
+        assertTypeByData(byData, filename);
+        assertTypeByNameAndData(byNameAndData, filename);
+    }
+
+    private void assertTypeByNameAndData(String expected, String filename)
+        throws IOException {
+       assertEquals(expected, getTypeByNameAndData(filename).toString());
+    }
+
+    private MediaType getTypeByNameAndData(String filename) throws IOException {
+        try (InputStream stream = getTestDocumentAsStream(filename)) {
+            assertNotNull("Test document not found: " + filename, stream);
+            Metadata metadata = new Metadata();
+            metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+            return repo.detect(stream, metadata);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java b/tika-app/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
new file mode 100644
index 0000000..91b054e
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
@@ -0,0 +1,459 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.sax.BodyContentHandler;
+import org.gagravarr.tika.FlacParser;
+import org.gagravarr.tika.OpusParser;
+import org.gagravarr.tika.VorbisParser;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class AutoDetectParserTest {
+    private TikaConfig tika = TikaConfig.getDefaultConfig();
+
+    // Easy to read constants for the MIME types:
+    private static final String RAW        = "application/octet-stream";
+    private static final String EXCEL      = "application/vnd.ms-excel";
+    private static final String HTML       = "text/html; charset=ISO-8859-1";
+    private static final String PDF        = "application/pdf";
+    private static final String POWERPOINT = "application/vnd.ms-powerpoint";
+    private static final String KEYNOTE    = "application/vnd.apple.keynote";
+    private static final String PAGES      = "application/vnd.apple.pages";
+    private static final String NUMBERS    = "application/vnd.apple.numbers";
+    private static final String CHM        = "application/vnd.ms-htmlhelp";
+    private static final String RTF        = "application/rtf";
+    private static final String PLAINTEXT  = "text/plain; charset=ISO-8859-1";
+    private static final String UTF8TEXT   = "text/plain; charset=UTF-8";
+    private static final String WORD       = "application/msword";
+    private static final String XML        = "application/xml";
+    private static final String RSS        = "application/rss+xml";
+    private static final String BMP        = "image/x-ms-bmp";
+    private static final String GIF        = "image/gif";
+    private static final String JPEG       = "image/jpeg";
+    private static final String PNG        = "image/png";
+    private static final String OGG_VORBIS = "audio/vorbis";
+    private static final String OGG_OPUS   = "audio/opus";
+    private static final String OGG_FLAC   = "audio/x-oggflac"; 
+    private static final String FLAC_NATIVE= "audio/x-flac";
+    private static final String OPENOFFICE
+            = "application/vnd.oasis.opendocument.text";
+
+
+    /**
+     * This is where a single test is done.
+     * @param tp the parameters encapsulated in a TestParams instance
+     * @throws IOException
+     */
+    private void assertAutoDetect(TestParams tp) throws Exception {
+        try (InputStream input = AutoDetectParserTest.class.getResourceAsStream(tp.resourceRealName)) {
+            if (input == null) {
+                fail("Could not open stream from specified resource: "
+                        + tp.resourceRealName);
+            }
+            Metadata metadata = new Metadata();
+            metadata.set(Metadata.RESOURCE_NAME_KEY, tp.resourceStatedName);
+            metadata.set(Metadata.CONTENT_TYPE, tp.statedType);
+            ContentHandler handler = new BodyContentHandler();
+            new AutoDetectParser(tika).parse(input, handler, metadata);
+
+            assertEquals("Bad content type: " + tp,
+                    tp.realType, metadata.get(Metadata.CONTENT_TYPE));
+
+            if (tp.expectedContentFragment != null) {
+                assertTrue("Expected content not found: " + tp,
+                        handler.toString().contains(tp.expectedContentFragment));
+            }
+        }
+    }
+
+    /**
+     * Convenience method -- its sole purpose of existence is to make the
+     * call to it more readable than it would be if a TestParams instance
+     * would need to be instantiated there.
+     *
+     * @param resourceRealName real name of resource
+     * @param resourceStatedName stated name -- will a bad name fool us?
+     * @param realType - the real MIME type
+     * @param statedType - stated MIME type - will a wrong one fool us?
+     * @param expectedContentFragment - something expected in the text
+     * @throws Exception
+     */
+    private void assertAutoDetect(String resourceRealName,
+                                  String resourceStatedName,
+                                  String realType,
+                                  String statedType,
+                                  String expectedContentFragment)
+            throws Exception {
+
+        assertAutoDetect(new TestParams(resourceRealName, resourceStatedName,
+                realType, statedType, expectedContentFragment));
+    }
+
+    private void assertAutoDetect(
+            String resource, String type, String content) throws Exception {
+
+        resource = "/test-documents/" + resource;
+
+        // TODO !!!!  The disabled tests below should work!
+        // The correct MIME type should be determined regardless of the
+        // stated type (ContentType hint) and the stated URL name.
+
+
+        // Try different combinations of correct and incorrect arguments:
+        final String wrongMimeType = RAW;
+        assertAutoDetect(resource, resource, type, type,          content);
+        assertAutoDetect(resource, resource, type, null,          content);
+        assertAutoDetect(resource, resource, type, wrongMimeType, content);
+
+        assertAutoDetect(resource, null, type, type,          content);
+        assertAutoDetect(resource, null, type, null,          content);
+        assertAutoDetect(resource, null, type, wrongMimeType, content);
+
+        final String badResource = "a.xyz";
+        assertAutoDetect(resource, badResource, type, type,          content);
+        assertAutoDetect(resource, badResource, type, null,          content);
+        assertAutoDetect(resource, badResource, type, wrongMimeType, content);
+    }
+
+    @Test
+    public void testKeynote() throws Exception {
+        assertAutoDetect("testKeynote.key", KEYNOTE, "A sample presentation");
+    }
+
+    @Test
+    public void testPages() throws Exception {
+        assertAutoDetect("testPages.pages", PAGES, "Sample pages document");
+    }
+
+    @Test
+    public void testNumbers() throws Exception {
+        assertAutoDetect("testNumbers.numbers", NUMBERS, "Checking Account: 300545668");
+    }
+
+    @Test
+    public void testChm() throws Exception {
+        assertAutoDetect("testChm.chm", CHM, "If you do not specify a window type or a window name, the main window is used.");
+    }
+
+    @Test
+    public void testEpub() throws Exception {
+        assertAutoDetect(
+                "testEPUB.epub", "application/epub+zip",
+                "The previous headings were subchapters");
+    }
+
+    @Test
+    public void testExcel() throws Exception {
+        assertAutoDetect("testEXCEL.xls", EXCEL, "Sample Excel Worksheet");
+    }
+
+    @Test
+    public void testHTML() throws Exception {
+        assertAutoDetect("testHTML.html", HTML, "Test Indexation Html");
+    }
+
+    @Test
+    public void testOpenOffice() throws Exception {
+        assertAutoDetect("testOpenOffice2.odt", OPENOFFICE,
+                "This is a sample Open Office document");
+    }
+
+    @Test
+    public void testPDF() throws Exception {
+        assertAutoDetect("testPDF.pdf", PDF, "Content Analysis Toolkit");
+
+    }
+
+    @Test
+    public void testPowerpoint() throws Exception {
+        assertAutoDetect("testPPT.ppt", POWERPOINT, "Sample Powerpoint Slide");
+    }
+
+    @Test
+    public void testRdfXml() throws Exception {
+        assertAutoDetect("testRDF.rdf", "application/rdf+xml", "");
+    }
+
+    @Test
+    public void testRTF() throws Exception {
+        assertAutoDetect("testRTF.rtf", RTF, "indexation Word");
+    }
+
+    @Test
+    public void testText() throws Exception {
+        assertAutoDetect("testTXT.txt", PLAINTEXT, "indexation de Txt");
+    }
+    
+    @Test
+    public void testTextNonASCIIUTF8() throws Exception {
+        assertAutoDetect("testTXTNonASCIIUTF8.txt", UTF8TEXT, "The quick brown fox jumps over the lazy dog");
+    }
+
+    @Test
+    public void testWord() throws Exception {
+        assertAutoDetect("testWORD.doc", WORD, "Sample Word Document");
+    }
+
+    @Test
+    public void testXML() throws Exception {
+        assertAutoDetect("testXML.xml", XML, "Lius");
+    }
+
+    @Test
+    public void testRss() throws Exception {
+        assertAutoDetect("/test-documents/rsstest.rss", "feed", RSS, "application/rss+xml", "Sample RSS File for Junit test");
+    }
+    
+    @Test
+    public void testImages() throws Exception {
+       assertAutoDetect("testBMP.bmp", BMP, null);
+       assertAutoDetect("testGIF.gif", GIF, null);
+       assertAutoDetect("testJPEG.jpg", JPEG, null);
+       assertAutoDetect("testPNG.png", PNG, null);
+   }
+
+    /**
+     * Make sure that zip bomb attacks are prevented.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-216">TIKA-216</a>
+     */
+    @Test
+    public void testZipBombPrevention() throws Exception {
+        try (InputStream tgz = AutoDetectParserTest.class.getResourceAsStream(
+                "/test-documents/TIKA-216.tgz")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler(-1);
+            new AutoDetectParser(tika).parse(tgz, handler, metadata);
+            fail("Zip bomb was not detected");
+        } catch (TikaException e) {
+            // expected
+        }
+    }
+
+    /**
+     * Make sure XML parse errors don't trigger ZIP bomb detection.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-1322">TIKA-1322</a>
+     */
+    @Test
+    public void testNoBombDetectedForInvalidXml() throws Exception {
+        // create zip with ten empty / invalid XML files, 1.xml .. 10.xml
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        ZipOutputStream zos = new ZipOutputStream(baos);
+        for (int i = 1; i <= 10; i++) {
+            zos.putNextEntry(new ZipEntry(i + ".xml"));
+            zos.closeEntry();
+        }
+        zos.finish();
+        zos.close();
+        new AutoDetectParser(tika).parse(new ByteArrayInputStream(baos.toByteArray()), new BodyContentHandler(-1),
+                new Metadata());
+    }
+
+    /**
+     * Test to ensure that the Ogg Audio parsers (Vorbis, Opus, Flac etc)
+     *  have been correctly included, and are available
+     */
+    @SuppressWarnings("deprecation")
+    @Test
+    public void testOggFlacAudio() throws Exception {
+       // The three test files should all have similar test data
+       String[] testFiles = new String[] {
+             "testVORBIS.ogg", "testFLAC.flac", "testFLAC.oga",
+             "testOPUS.opus"
+       };
+       MediaType[] mediaTypes = new MediaType[] {
+               MediaType.parse(OGG_VORBIS), MediaType.parse(FLAC_NATIVE),
+               MediaType.parse(OGG_FLAC), MediaType.parse(OGG_OPUS)
+       };
+       
+       // Check we can load the parsers, and they claim to do the right things
+       VorbisParser vParser = new VorbisParser();
+       assertNotNull("Parser not found for " + mediaTypes[0], 
+                     vParser.getSupportedTypes(new ParseContext()));
+       
+       FlacParser fParser = new FlacParser();
+       assertNotNull("Parser not found for " + mediaTypes[1], 
+                     fParser.getSupportedTypes(new ParseContext()));
+       assertNotNull("Parser not found for " + mediaTypes[2], 
+                     fParser.getSupportedTypes(new ParseContext()));
+       
+       OpusParser oParser = new OpusParser();
+       assertNotNull("Parser not found for " + mediaTypes[3], 
+                     oParser.getSupportedTypes(new ParseContext()));
+       
+       // Check we found the parser
+       CompositeParser parser = (CompositeParser)tika.getParser();
+       for (MediaType mt : mediaTypes) {
+          assertNotNull("Parser not found for " + mt, parser.getParsers().get(mt) );
+       }
+       
+       // Have each file parsed, and check
+       for (int i=0; i<testFiles.length; i++) {
+           String file = testFiles[i];
+           try (InputStream input = AutoDetectParserTest.class.getResourceAsStream(
+                   "/test-documents/" + file)) {
+               if (input == null) {
+                   fail("Could not find test file " + file);
+               }
+               Metadata metadata = new Metadata();
+               ContentHandler handler = new BodyContentHandler();
+               new AutoDetectParser(tika).parse(input, handler, metadata);
+
+               assertEquals("Incorrect content type for " + file,
+                       mediaTypes[i].toString(), metadata.get(Metadata.CONTENT_TYPE));
+
+               // Check some of the common metadata
+               // Old style metadata
+               assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+               assertEquals("Test Title", metadata.get(Metadata.TITLE));
+               // New style metadata
+               assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+               assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+
+               // Check some of the XMPDM metadata
+               if (!file.endsWith(".opus")) {
+                   assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
+               }
+               assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
+               assertEquals("Stereo", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
+               assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
+
+               // Check some of the text
+               String content = handler.toString();
+               assertTrue(content.contains("Test Title"));
+               assertTrue(content.contains("Test Artist"));
+           }
+       }
+    }
+    
+    /**
+     * Test case for TIKA-514. Provide constructor for AutoDetectParser that has explicit
+     * list of supported parsers.
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-514">TIKA-514</a>
+     */
+    @Test
+    public void testSpecificParserList() throws Exception {
+        AutoDetectParser parser = new AutoDetectParser(new MyDetector(), new MyParser());
+        
+        InputStream is = new ByteArrayInputStream("test".getBytes(UTF_8));
+        Metadata metadata = new Metadata();
+        parser.parse(is, new BodyContentHandler(), metadata, new ParseContext());
+        
+        assertEquals("value", metadata.get("MyParser"));
+    }
+
+    private static final MediaType MY_MEDIA_TYPE = new MediaType("application", "x-myparser");
+    
+    /**
+     * A test detector which always returns the type supported
+     *  by the test parser
+     */
+    @SuppressWarnings("serial")
+    private static class MyDetector implements Detector {
+        public MediaType detect(InputStream input, Metadata metadata) throws IOException {
+            return MY_MEDIA_TYPE;
+        }
+    }
+    
+    @SuppressWarnings("serial")
+    private static class MyParser extends AbstractParser {
+        public Set<MediaType> getSupportedTypes(ParseContext context) {
+            Set<MediaType> supportedTypes = new HashSet<MediaType>();
+            supportedTypes.add(MY_MEDIA_TYPE);
+            return supportedTypes;
+        }
+
+        public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) {
+            metadata.add("MyParser", "value");
+        }
+
+    }
+    
+    /**
+     * Minimal class to encapsulate all parameters -- the main reason for
+     * its existence is to aid in debugging via its toString() method.
+     *
+     * Getters and setters intentionally not provided.
+     */
+    private static class TestParams {
+
+        public String resourceRealName;
+        public String resourceStatedName;
+        public String realType;
+        public String statedType;
+        public String expectedContentFragment;
+
+
+        private TestParams(String resourceRealName,
+                           String resourceStatedName,
+                           String realType,
+                           String statedType,
+                           String expectedContentFragment) {
+            this.resourceRealName = resourceRealName;
+            this.resourceStatedName = resourceStatedName;
+            this.realType = realType;
+            this.statedType = statedType;
+            this.expectedContentFragment = expectedContentFragment;
+        }
+
+
+        /**
+         * Produces a string like the following:
+         *
+         * <pre>
+         * Test parameters:
+         *   resourceRealName        = /test-documents/testEXCEL.xls
+         *   resourceStatedName      = null
+         *   realType                = application/vnd.ms-excel
+         *   statedType              = null
+         *   expectedContentFragment = Sample Excel Worksheet
+         * </pre>
+         */
+        public String toString() {
+            return "Test parameters:\n"
+                + "  resourceRealName        = " + resourceRealName + "\n"
+                + "  resourceStatedName      = " + resourceStatedName + "\n"
+                + "  realType                = " + realType + "\n"
+                + "  statedType              = " + statedType + "\n"
+                + "  expectedContentFragment = " + expectedContentFragment + "\n";
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/DigestingParserTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/DigestingParserTest.java b/tika-app/src/test/java/org/apache/tika/parser/DigestingParserTest.java
new file mode 100644
index 0000000..66323d3
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/DigestingParserTest.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.digesting.CommonsDigester;
+import org.junit.Test;
+
+
+public class DigestingParserTest extends TikaTest {
+
+    private final static String P = TikaCoreProperties.TIKA_META_PREFIX+
+            "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+
+    private final int UNLIMITED = 1000000;//well, not really, but longer than input file
+    private final Parser p = new AutoDetectParser();
+
+    @Test
+    public void testBasic() throws Exception {
+        Map<CommonsDigester.DigestAlgorithm, String> expected =
+                new HashMap<CommonsDigester.DigestAlgorithm, String>();
+
+        expected.put(CommonsDigester.DigestAlgorithm.MD2,"d768c8e27b0b52c6eaabfaa7122d1d4f");
+        expected.put(CommonsDigester.DigestAlgorithm.MD5,"59f626e09a8c16ab6dbc2800c685f772");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA1,"7a1f001d163ac90d8ea54c050faf5a38079788a6");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA256,"c4b7fab030a8b6a9d6691f6699ac8e6f" +
+                                                            "82bc53764a0f1430d134ae3b70c32654");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA384,"ebe368b9326fef44408290724d187553"+
+                                                            "8b8a6923fdf251ddab72c6e4b5d54160" +
+                                                            "9db917ba4260d1767995a844d8d654df");
+        expected.put(CommonsDigester.DigestAlgorithm.SHA512,"ee46d973ee1852c018580c242955974d"+
+                                                            "da4c21f36b54d7acd06fcf68e974663b"+
+                                                            "fed1d256875be58d22beacf178154cc3"+
+                                                            "a1178cb73443deaa53aa0840324708bb");
+
+        //test each one
+        for (CommonsDigester.DigestAlgorithm algo : CommonsDigester.DigestAlgorithm.values()) {
+            Metadata m = new Metadata();
+            XMLResult xml = getXML("test_recursive_embedded.docx",
+                    new DigestingParser(p, new CommonsDigester(UNLIMITED, algo)), m);
+            assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
+        }
+
+
+        //test comma separated
+        CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse("md5,sha256,sha384,sha512");
+        Metadata m = new Metadata();
+        XMLResult xml = getXML("test_recursive_embedded.docx",
+                new DigestingParser(p, new CommonsDigester(UNLIMITED, algos)), m);
+        for (CommonsDigester.DigestAlgorithm algo : new CommonsDigester.DigestAlgorithm[]{
+                CommonsDigester.DigestAlgorithm.MD5,
+                CommonsDigester.DigestAlgorithm.SHA256,
+                CommonsDigester.DigestAlgorithm.SHA384,
+                CommonsDigester.DigestAlgorithm.SHA512}) {
+            assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
+        }
+
+        assertNull(m.get(P+CommonsDigester.DigestAlgorithm.MD2.toString()));
+        assertNull(m.get(P+CommonsDigester.DigestAlgorithm.SHA1.toString()));
+
+    }
+
+    @Test
+    public void testLimitedRead() throws Exception {
+        CommonsDigester.DigestAlgorithm algo = CommonsDigester.DigestAlgorithm.MD5;
+        int limit = 100;
+        byte[] bytes = new byte[limit];
+        InputStream is = getResourceAsStream("/test-documents/test_recursive_embedded.docx");
+        is.read(bytes, 0, limit);
+        is.close();
+        Metadata m = new Metadata();
+        try {
+            XMLResult xml = getXML(TikaInputStream.get(bytes),
+                    new DigestingParser(p, new CommonsDigester(100, algo)), m);
+        } catch (TikaException e) {
+            //thrown because this is just a file fragment
+            assertContains("Unexpected RuntimeException from org.apache.tika.parser.microsoft.ooxml.OOXMLParser",
+                    e.getMessage());
+        }
+        String expectedMD5 = m.get(P+"MD5");
+
+        m = new Metadata();
+        XMLResult xml = getXML("test_recursive_embedded.docx",
+                new DigestingParser(p, new CommonsDigester(100, algo)), m);
+        assertEquals(expectedMD5, m.get(P+"MD5"));
+    }
+
+    @Test
+    public void testReset() throws Exception {
+        String expectedMD5 = "1643c2cef21e36720c54f4f6cb3349d0";
+        Metadata m = new Metadata();
+        XMLResult xml = getXML("test_recursive_embedded.docx",
+                new DigestingParser(p, new CommonsDigester(100, CommonsDigester.DigestAlgorithm.MD5)), m);
+        assertEquals(expectedMD5, m.get(P+"MD5"));
+    }
+
+    @Test
+    public void testNegativeMaxMarkLength() throws Exception {
+        Metadata m = new Metadata();
+        boolean ex = false;
+        try {
+            XMLResult xml = getXML("test_recursive_embedded.docx",
+                    new DigestingParser(p, new CommonsDigester(-1, CommonsDigester.DigestAlgorithm.MD5)), m);
+        } catch (IllegalArgumentException e) {
+            ex = true;
+        }
+        assertTrue("Exception not thrown", ex);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/ParsingReaderTest.java b/tika-app/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
new file mode 100644
index 0000000..71c07b7
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.Reader;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.junit.Test;
+
+public class ParsingReaderTest {
+
+    @Test
+    public void testPlainText() throws Exception {
+        String data = "test content";
+        InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
+        Reader reader = new ParsingReader(stream, "test.txt");
+        assertEquals('t', reader.read());
+        assertEquals('e', reader.read());
+        assertEquals('s', reader.read());
+        assertEquals('t', reader.read());
+        assertEquals(' ', reader.read());
+        assertEquals('c', reader.read());
+        assertEquals('o', reader.read());
+        assertEquals('n', reader.read());
+        assertEquals('t', reader.read());
+        assertEquals('e', reader.read());
+        assertEquals('n', reader.read());
+        assertEquals('t', reader.read());
+        assertEquals('\n', reader.read());
+        assertEquals(-1, reader.read());
+        reader.close();
+        assertEquals(-1, stream.read());
+    }
+
+    @Test
+    public void testXML() throws Exception {
+        String data = "<p>test <span>content</span></p>";
+        InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
+        Reader reader = new ParsingReader(stream, "test.xml");
+        assertEquals(' ', (char) reader.read());
+        assertEquals('t', (char) reader.read());
+        assertEquals('e', (char) reader.read());
+        assertEquals('s', (char) reader.read());
+        assertEquals('t', (char) reader.read());
+        assertEquals(' ', (char) reader.read());
+        assertEquals(' ', (char) reader.read());
+        assertEquals('c', (char) reader.read());
+        assertEquals('o', (char) reader.read());
+        assertEquals('n', (char) reader.read());
+        assertEquals('t', (char) reader.read());
+        assertEquals('e', (char) reader.read());
+        assertEquals('n', (char) reader.read());
+        assertEquals('t', (char) reader.read());
+        assertEquals('\n', (char) reader.read());
+        assertEquals(-1, reader.read());
+        reader.close();
+        assertEquals(-1, stream.read());
+    }
+
+    /**
+     * Test case for TIKA-203
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-203">TIKA-203</a>
+     */
+    @Test
+    public void testMetadata() throws Exception {
+        Metadata metadata = new Metadata();
+        InputStream stream = ParsingReaderTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL.xls");
+        try (Reader reader = new ParsingReader(
+                new AutoDetectParser(), stream, metadata, new ParseContext())) {
+            // Metadata should already be available
+            assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
+            // Check that the internal buffering isn't broken
+            assertEquals('F', (char) reader.read());
+            assertEquals('e', (char) reader.read());
+            assertEquals('u', (char) reader.read());
+            assertEquals('i', (char) reader.read());
+            assertEquals('l', (char) reader.read());
+            assertEquals('1', (char) reader.read());
+        }
+    }
+
+}


[09/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif b/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
deleted file mode 100644
index e131add..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
+++ /dev/null
@@ -1,56 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-        <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd">
-          <Entry_ID>02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c</Entry_ID>
-          <Entry_Title>Barrow Logger Data NIMS 2014</Entry_Title>
-
-          <Parameters>
-            <Category>EARTH SCIENCE</Category>
-            <Topic>BIOSPHERE</Topic>
-            <Term>ECOLOGICAL DYNAMICS</Term>
-          </Parameters>
-
-
-          <Spatial_Coverage>
-            <Southernmost_Latitude>70</Southernmost_Latitude>
-            <Northernmost_Latitude>72</Northernmost_Latitude>
-            <Westernmost_Longitude>-162</Westernmost_Longitude>
-            <Easternmost_Longitude>-150</Easternmost_Longitude>
-          </Spatial_Coverage>
-
-          <Data_Center>
-            <Data_Center_Name>
-              <Short_Name>ACADIS</Short_Name>
-              <Long_Name>Advanced Cooperative Arctic Data and Information Service</Long_Name>
-            </Data_Center_Name>
-            <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
-            <Personnel>
-              <Role>DATA CENTER CONTACT</Role>
-              <First_Name>ACADIS</First_Name>
-              <Last_Name>User Services</Last_Name>
-              <Contact_Address>
-                <Address>NCAR/CISL</Address>
-                <Address>P.O. Box 3000</Address>
-                <City>Boulder</City>
-                <Province_or_State>CO</Province_or_State>
-                <Postal_Code>80307</Postal_Code>
-                <Country>USA</Country>
-              </Contact_Address>
-            </Personnel>
-          </Data_Center>
-
-          <Summary>
-            <Abstract>Logger records from the Networked Info-mechanical Systems (NIMS), Transect length: ~50m The data was recorded using a CR3000 logger. The sensor trolley was equipped with instruments for recording the distance to vegetation canopy (SR50a Sonic Distance, Campbell Scientific), up- and downwelling short- and longwave radiation (CNR4 net radiometer, Kipp &amp; Zonen), air temperature and surface temperature (SI-111 IR radiometer, Apogee Instruments Inc.) and spectral reflection (Jaz Combo-2, Ocean Optics; GreenSeeker RT100 (505), NTech).</Abstract>
-          </Summary>
-
-          <Related_URL>
-            <URL_Content_Type>
-              <Type>GET DATA</Type>
-            </URL_Content_Type>
-            <URL>http://www.aoncadis.org/dataset/id/02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c.html</URL>
-            <Description>Data Center top-level access page for this resource</Description>
-          </Related_URL>
-
-          <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
-          <Metadata_Version>9.8.4</Metadata_Version>
-          <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
-        </DIF>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg b/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg
deleted file mode 100644
index d68ff55..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg:svg xmlns:svg="http://www.w3.org/2000/svg" width="12cm" height="12cm">
-  <svg:g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
-    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
-    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
-    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
-  </svg:g>
-</svg:svg>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/circles.svg
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/circles.svg b/tika-core/src/test/resources/org/apache/tika/mime/circles.svg
deleted file mode 100644
index 8b71e82..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/circles.svg
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg xmlns="http://www.w3.org/2000/svg" width="12cm" height="12cm">
-  <g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
-    <circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
-    <circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
-    <circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
-  </g>
-</svg>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/datamatrix.png
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/datamatrix.png b/tika-core/src/test/resources/org/apache/tika/mime/datamatrix.png
deleted file mode 100644
index 4aa5003..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/datamatrix.png and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/gdas1.forecmwf.2014062612.grib2
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/gdas1.forecmwf.2014062612.grib2 b/tika-core/src/test/resources/org/apache/tika/mime/gdas1.forecmwf.2014062612.grib2
deleted file mode 100644
index 7ab3416..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/gdas1.forecmwf.2014062612.grib2 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/htmlfragment
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/htmlfragment b/tika-core/src/test/resources/org/apache/tika/mime/htmlfragment
deleted file mode 100644
index bf36d08..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/htmlfragment
+++ /dev/null
@@ -1,18 +0,0 @@
-<div id="leftcol">
-	  <ul>
-        <li><a href="/mission/sec/sec.html"> Security and Information Sciences Home&nbsp;&rsaquo;</a>        </li>
-        <li><a href="/mission/sec/publications/-publications.html">Publications&nbsp;&rsaquo;</a> </li>
-        <li><a href="/mission/sec/corpora/corpora.html">Corpora&nbsp;&rsaquo;</a> </li>
-        <li><a href="/mission/sec/softwaretools/tools.html">Software Tools&nbsp;&rsaquo;</a></li>
-        <li><a href="/mission/sec/CSO/CSO.html"> Systems and Operations&nbsp;&rsaquo;</a>
-          <ul>
-            <li><a href="/mission/sec/publications/-publications.html">Publications &rsaquo;</a></li>
-            <li><a href="/mission/sec/CSO/biographies/CSObios.html">Biographies&nbsp;&rsaquo;</a></li>
-          </ul>
-        </li>
-        <li><a href="/mission/sec/CST/CST.html"> Systems and Technology&nbsp;&rsaquo;</a> </li>
-        <li><a href="/mission/sec/CSA/CSA.html"> System Assessments&nbsp;&rsaquo;</a> </li>
-	    <li><a href="/mission/sec/HLT/HLT.html">Human Language Technology&nbsp;&rsaquo;</a>
-<li><a href="/mission/sec/computing/computing.html">Computing and Analytics&nbsp;&rsaquo;</a></li>
-  </ul>
-</div>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/plotutils-bin-cgm-v3.cgm
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/plotutils-bin-cgm-v3.cgm b/tika-core/src/test/resources/org/apache/tika/mime/plotutils-bin-cgm-v3.cgm
deleted file mode 100644
index 450f5ad..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/plotutils-bin-cgm-v3.cgm and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl b/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl
deleted file mode 100644
index d704f07..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl
+++ /dev/null
@@ -1,9 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-
-  <xsl:output method="xml" indent="yes"/>
-
-  <xsl:template match="/">
-    <test hello="world"/>
-  </xsl:template>
-</xsl:stylesheet>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf1.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf1.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf1.xml
deleted file mode 100644
index dc88dcf..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf1.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<?xml version='1.0' encoding='ISO-8859-1'?>
-
-<!DOCTYPE uridef[
-  <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns">
-  <!ENTITY shadow-rdf "http://www.daml.org/services/owl-s/1.2/generic/ObjectList.owl">
-  <!ENTITY expr "http://www.daml.org/services/owl-s/1.2/generic/Expression.owl">
-  <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema">
-  <!ENTITY owl "http://www.w3.org/2002/07/owl">
-  <!ENTITY xsd "http://www.w3.org/2001/XMLSchema">
-  <!ENTITY time "http://www.isi.edu/~hobbs/damltime/time-entry.owl">
-  <!ENTITY swrl "http://www.w3.org/2003/11/swrl">
-  <!ENTITY service "http://www.daml.org/services/owl-s/1.2/Service.owl">
-  <!ENTITY grounding "http://www.daml.org/services/owl-s/1.2/Grounding.owl">
-  <!ENTITY process "http://www.daml.org/services/owl-s/1.2/Process.owl">
-  <!ENTITY DEFAULT "http://www.daml.org/services/owl-s/1.2/Process.owl">
-]>
-
-
-<rdf:RDF
-  xmlns:rdf=    "&rdf;#"
-  xmlns:shadow-rdf= "&shadow-rdf;#"
-  xmlns:expr= "&expr;#"
-  xmlns:rdfs=   "&rdfs;#"
-  xmlns:owl= "&owl;#"
-  xmlns:swrl= "&swrl;#"
-  xmlns:xsd= "&xsd;#"
-  xmlns:service= "&service;#"
-  xmlns:process= "&process;#"
-  xmlns:grounding= "&grounding;#"
-  xmlns=        "&DEFAULT;#"
-  xml:base="&process;">
-
-<!--
-  TIKA-309: Mime type application/rdf+xml not correctly detected
-  Simplified test case based on the OWL document at
-  http://www.ai.sri.com/daml/services/owl-s/1.2/Process.owl
--->
-
-</rdf:RDF>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf2.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf2.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf2.xml
deleted file mode 100644
index 0f8fe28..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test-difficult-rdf2.xml
+++ /dev/null
@@ -1,44 +0,0 @@
-<!-- This is the OWL 2 Namespace Document, sometimes
-     called the "owl.owl" file.
-
-     For some commentary about its creation, see
-     http://www.w3.org/2007/OWL/wiki/Owl2DotOwlDevel
-
-     This was created from the 16 Oct 2009 version of
-     that page, with the turtle-to-rdf/xml conversion
-     done by cwm, and the conversion to XML entity
-     references done by hand. The GRDDL triple and
-     namespace have also been added by hand
-
-     The real OWL 1 and OWL 2 namespace is:
-          http://www.w3.org/2002/07/owl#
-
--->
-<!DOCTYPE rdf:RDF [
-
-<!ENTITY location "http://www.w3.org/2002/07/owl" >
-<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
-<!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
-<!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
-<!ENTITY dc "http://purl.org/dc/elements/1.1/" >
-<!ENTITY grddl "http://www.w3.org/2003/g/data-view#" >
-<!ENTITY owl "&location;#" >
-
-]>
-<rdf:RDF
-    xml:base ="&location;"
-    xmlns:rdf ="&rdf;"
-    xmlns:rdfs="&rdfs;"
-    xmlns:xsd = "&xsd;"
-    xmlns:owl ="&owl;"
-    xmlns:dc = "&dc;"
-    xmlns:grddl = "&grddl;"
-    >
-
-<!--
-  TIKA-309: Mime type application/rdf+xml not correctly detected
-  Simplified test case based on the OWL 2 Namespace Document at
-  http://www.w3.org/2002/07/owl#
--->
-
-</rdf:RDF>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml
deleted file mode 100644
index 7573369..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml
+++ /dev/null
@@ -1,2 +0,0 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml
deleted file mode 100644
index 84844ec..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-  
-  http://www.apache.org/licenses/LICENSE-2.0
-  
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<?somepi blahblah test="ignore-me.xml" ?>
-<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-malformed-header.html.bin
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-malformed-header.html.bin b/tika-core/src/test/resources/org/apache/tika/mime/test-malformed-header.html.bin
deleted file mode 100644
index 069ee51..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/test-malformed-header.html.bin and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-tika-327.html
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-tika-327.html b/tika-core/src/test/resources/org/apache/tika/mime/test-tika-327.html
deleted file mode 100644
index fe9d04b..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test-tika-327.html
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version="1.0" encoding="iso-8859-1"?><link href="http://www.apache.org" rel="stylesheet" type="text/css" />
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
-<title>title</title>
-<meta name="description" content="content" />
-<meta name="keywords" content="keys" />
-<script language="JavaScript" type="text/javascript">
-<!--
-function hello() {
-}
-//-->
-
-
-</script>
-
-<!-- IE fix -->
-<style type="text/css">form { display: inline }</style>
-<!--
-comment
--->
-</head>
-
-<body>
-<table>
-  <tr>
-    <td> 
-	<table>
-        <tr>
-          <td><font class="title"><!--comment--><a href="index.php">image</a></font></td>
-          <td> <table>
-              <tr>
-                <td>
-                                  </td>
-              </tr>
-         </table></td>
-
-
-
-        </tr>
-        <tr>
-          <td>
-            <span class="class">Home </span>            </span>
-          </td>
-          <td>
-            July 2, 2013           </td>
-        </tr>
-      </table></td>
-  </tr>
-</table>
-end of table
-</body>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-utf16be.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-utf16be.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-utf16be.xml
deleted file mode 100644
index 6835338..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/test-utf16be.xml and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-utf16le.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-utf16le.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-utf16le.xml
deleted file mode 100644
index 2a9124d..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/test-utf16le.xml and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-utf8-bom.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-utf8-bom.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-utf8-bom.xml
deleted file mode 100644
index 4cd4db3..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test-utf8-bom.xml
+++ /dev/null
@@ -1,2 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml b/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml
deleted file mode 100644
index 1304d8b..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml
+++ /dev/null
@@ -1,2 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test.html
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test.html b/tika-core/src/test/resources/org/apache/tika/mime/test.html
deleted file mode 100644
index 763e237..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/test.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>Hello World</title>
-</head>
-<body>
-  <p>Hello World!<p/>
-</body>
-</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/test.xls
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test.xls b/tika-core/src/test/resources/org/apache/tika/mime/test.xls
deleted file mode 100644
index 347d8a6..0000000
Binary files a/tika-core/src/test/resources/org/apache/tika/mime/test.xls and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-core/src/test/resources/org/apache/tika/mime/testlargerbuffer.html
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/testlargerbuffer.html b/tika-core/src/test/resources/org/apache/tika/mime/testlargerbuffer.html
deleted file mode 100644
index 545addd..0000000
--- a/tika-core/src/test/resources/org/apache/tika/mime/testlargerbuffer.html
+++ /dev/null
@@ -1,827 +0,0 @@
-<script language="javascript">
-
-function addToList(from,to)
-{
-    if(from.selectedIndex >= 0) {
-        isPresent = false;
-        var options=to.getElementsByTagName("option");
-
-        if(from.item(from.selectedIndex).value == "0") {
-            for(i=to.options.length-1; i>= 0; i--) {
-                to.removeChild(options[i]);
-            }
-        }
-        for (i=0; i< to.options.length; i++)
-        {
-            if(options[i].value == from.item(from.selectedIndex).value || options[i].value == "0")
-                isPresent = true;
-        }
-        if(!isPresent) {
-            var oOption = document.createElement("option");;
-            to.appendChild(oOption);
-            oOption.value = from.item(from.selectedIndex).value;
-            oOption.text  = from.item(from.selectedIndex).text;
-        }
-    }
-}
-
-function delFromList(to)
-{
-    if(to.selectedIndex >= 0) {
-      var options=to.getElementsByTagName("option");
-      to.removeChild(options[to.selectedIndex]);
-    }
-}
-
-function fillListToGet(form, to)
-{
-    var options=to.getElementsByTagName("option");
-    for (i=0; i< to.options.length; i++)
-    {
-        form.action += "&"+to.name+"="+options[i].value;
-    }
-}
-
-</script>
-<script language="javascript">
-
-function addToList(from,to)
-{
-    if(from.selectedIndex >= 0) {
-        isPresent = false;
-        var options=to.getElementsByTagName("option");
-
-        if(from.item(from.selectedIndex).value == "0") {
-            for(i=to.options.length-1; i>= 0; i--) {
-                to.removeChild(options[i]);
-            }
-        }
-        for (i=0; i< to.options.length; i++)
-        {
-            if(options[i].value == from.item(from.selectedIndex).value || options[i].value == "0")
-                isPresent = true;
-        }
-        if(!isPresent) {
-            var oOption = document.createElement("option");;
-            to.appendChild(oOption);
-            oOption.value = from.item(from.selectedIndex).value;
-            oOption.text  = from.item(from.selectedIndex).text;
-        }
-    }
-}
-
-function delFromList(to)
-{
-    if(to.selectedIndex >= 0) {
-      var options=to.getElementsByTagName("option");
-      to.removeChild(options[to.selectedIndex]);
-    }
-}
-
-function fillListToGet(form, to)
-{
-    var options=to.getElementsByTagName("option");
-    for (i=0; i< to.options.length; i++)
-    {
-        form.action += "&"+to.name+"="+options[i].value;
-    }
-}
-
-function fillOtherGet(form)
-{
-  if (document.all.price_from != "") {
-    form.action += "&price_from="+document.all.price_from.value;
-  }
-  if (document.all.price_to != "") {
-    form.action += "&price_to="+document.all.price_to.value;
-  }
-  if (document.all.square_from != "") {
-    form.action += "&square_from="+document.all.square_from.value;
-  }
-  if (document.all.square_to != "") {
-    form.action += "&square_to="+document.all.square_to.value;
-  }
-  if (document.all.MKAD != "") {
-    form.action += "&MKAD="+document.all.MKAD.value;
-  }
-}
-
-</script>
-<script language="javascript">
-
-function addToList(from,to)
-{
-    if(from.selectedIndex >= 0) {
-        isPresent = false;
-        var options=to.getElementsByTagName("option");
-
-        if(from.item(from.selectedIndex).value == "0") {
-            for(i=to.options.length-1; i>= 0; i--) {
-                to.removeChild(options[i]);
-            }
-        }
-        for (i=0; i< to.options.length; i++)
-        {
-            if(options[i].value == from.item(from.selectedIndex).value || options[i].value == "0")
-                isPresent = true;
-        }
-        if(!isPresent) {
-            var oOption = document.createElement("option");
-            to.appendChild(oOption);
-            oOption.value = from.item(from.selectedIndex).value;
-            oOption.text  = from.item(from.selectedIndex).text;
-        }
-    }
-}
-
-function delFromList(to)
-{
-    if(to.selectedIndex >= 0) {
-      var options=to.getElementsByTagName("option");
-      to.removeChild(options[to.selectedIndex]);
-    }
-}
-
-function fillListToGet(form, to)
-{
-    var options=to.getElementsByTagName("option");
-    for (i=0; i< to.options.length; i++)
-    {
-        form.action += "&"+to.name+"="+options[i].value;
-    }
-}
-
-function fillOtherGet(form)
-{
-  if (document.all.price_from != "") {
-    form.action += "&price_from="+document.all.price_from.value;
-  }
-  if (document.all.price_to != "") {
-    form.action += "&price_to="+document.all.price_to.value;
-  }
-  if (document.all.square_from != "") {
-    form.action += "&square_from="+document.all.square_from.value;
-  }
-  if (document.all.square_to != "") {
-    form.action += "&square_to="+document.all.square_to.value;
-  }
-  if (document.all.MKAD != "") {
-    form.action += "&MKAD="+document.all.MKAD.value;
-  }
-}
-
-</script>
-
-<html>
-<head>
-<title>������ �������, ����� ��������,  ������ ������ � ������. ������������ ������������. ������ �������� "���������-������������"
-
-</title>
-<link rel="SHORTCUT ICON" href="/favicon.ico" />
-<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
-<meta http-equiv="Content-Language" content="ru">
-<meta name="Keywords" content="��������� ������������, ������, �����, �����, ����, ��������,  �������,  ���������, �����, �������, �������, �������, ���, ������, �������, ������������, ����������, �������, ������������, ������, ������, ����">
-<meta name="Description" content="��������� ������������ "��������� ������������", "������� ����" ������. ������ � ������� ������������ � ����� ������������ � ������ � �����������: �������, ���������, ����������������, �������� � ������ ������� ���������, ��������, �������, ��������, ����, ����. ������ �������, ������, ���������. ����� ��������. ����� ������. ������ ����.">
-<meta http-equiv="description" content="��������� ������������ "��������� ������������", "������� ����" ������. ������ � ������� ������������ � ����� ������������ � ������ � �����������: �������, ���������, ����������������, �������� � ������ ������� ���������, ��������, �������, ��������, ����, ����. ������ �������, ������, ���������. ����� ��������. ����� ������. ������ ����.">
-<meta name="revisit" content="7 days">
-<meta name='yandex-verification' content='77a043af80883202' />
-
-<link rel="stylesheet" href="continent.css" type="text/css">
-</head>
-<body bgcolor="#FFFFFF" text="#000000" leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">
-<table width="100%" border="0" cellspacing="0" cellpadding="0" height="100%">
-  <tr>
-    <td height="10"> 
-      <noindex><table width="100%" border="0" cellspacing="0" cellpadding="0">
-        <tr>
-          <td><a title="������ ������� ������� ������" href="/default.asp"><img src="imgs/logo2.gif" Alt="������ ������� �������, ������, ������, �������, ���������" width="205" height="68" style="margin-top:13px; margin-bottom:3px; margin-left:13px;" border=0></a></td>
-          <td align=center valign=bottom>
-          
-            <a href='http://office.realty-guide.ru/rot/?key=289' target=_blank><img src='/imgs/banners/ban32.gif' border=0 width=500 height=75></a>
-          
-          </td>
-        </tr>
-      </table></noindex>
-    </td>
-  </tr>
-  <tr>
-    <td valign="top" height="100%"> 
-      <table width="100%" border="0" cellspacing="0" cellpadding="0" height="100%">
-        <tr>
-          <td width="228" bgcolor="#546154" valign="top" align=center> 
-            <table width="100%" border="0" cellspacing="0" cellpadding="0" height=402>
-              <tr> 
-                <td height="147" background="imgs/hd_bg2.gif" valign="top"><img src="imgs/h_fl.jpg" width="202" height="136" style="margin-top: 10px; margin-left: 14px;" alt="������ ������� �������, ������, ������, �������, ���������"></td>
-              </tr>
-              <tr> 
-                <td height="255" valign="top">
-                  <OBJECT classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"
- codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,0,0"
- WIDTH="228" HEIGHT="250" id="menu10" ALIGN="">
- <PARAM NAME=movie VALUE="menu10.swf"> <PARAM NAME=quality VALUE=high> <PARAM NAME=bgcolor VALUE=#525E52> <EMBED src="menu10.swf" quality=high bgcolor=#525E52  WIDTH="228" HEIGHT="250" NAME="menu10" ALIGN=""
- TYPE="application/x-shockwave-flash" PLUGINSPAGE="http://www.macromedia.com/go/getflashplayer"></EMBED>
-</OBJECT>
-                </td>
-              </tr>
-            </table>
-<a href="/kommvip.asp"><img width=169 height=114 src="/imgs/vipbanner3.gif" border=0 alt="� ������ ������� �� ������ ������������ �� ������������� ��� �������� �� ������ ������������ ������������: ������ ������, �������, ���������, ����, ����������, ��������� ���������� ���������� � �.�., ������������ ������� ��������� ������������ ������� � ���������� ���������-������������"></a>
-<br>
-<br>
-<a href="/arendavip.asp"><img width=169 height=114 src="/imgs/vipbanner_arenda.jpg" border=0 alt="� ������ ������� �� ������ ������������ �� ������������ ������������� �� ������ �����: ������ �������, ������ ���������, ������ ���, ������ ����� � �.�., ������������ ������� ��������� ������������ ������� � ���������� ���������-������������"></a>
-<br>
-<br>
-<noindex><a target=_blank title="���������� ������� ������" href="http://www.lagunadom.ru"><img width=169 height=114 src="/ban/ban_169_114.gif" border=0 alt="���������� ������� ������"></a></noindex>
-<br>
-<br>
-<br>
-<br>
-<noindex><!--a target=_blank title="������������� ����, ���������� ����, ����, ������������� ���, ���������� ��� - ��������-������� ���������� ����" href="http://www.nyelki.ru"><img width=169 height=94 src="/imgs/banner.jpg" border=0 alt="������������� ����, ���������� ����, ����, ������������� ���, ���������� ��� - ��������-������� ���������� ����"></a>
-<br>
-<br>
-<br>
-<br-->
-<!-- Yandex.Metrika -->
-<script src="//mc.yandex.ru/resource/watch.js" type="text/javascript"></script>
-<script type="text/javascript">
-try { var yaCounter177293 = new Ya.Metrika(177293); } catch(e){}
-</script>
-<noscript><div style="position: absolute;"><img src="//mc.yandex.ru/watch/177293" alt="" /></div></noscript>
-<!-- Yandex.Metrika -->
-<!--Rating@Mail.ru COUNTER--><script language="JavaScript" type="text/javascript"><!--
-d=document;var a='';a+=';r='+escape(d.referrer)
-js=10//--></script><script language="JavaScript1.1" type="text/javascript"><!--
-a+=';j='+navigator.javaEnabled()
-js=11//--></script><script language="JavaScript1.2" type="text/javascript"><!--
-s=screen;a+=';s='+s.width+'*'+s.height
-a+=';d='+(s.colorDepth?s.colorDepth:s.pixelDepth)
-js=12//--></script><script language="JavaScript1.3" type="text/javascript"><!--
-js=13//--></script><script language="JavaScript" type="text/javascript"><!--
-d.write('<a target=_blank href="http://top.mail.ru/jump?from=782596"'+
-' target=_top><img src="http://top.list.ru/counter'+
-'?id=782596;t=54;js='+js+a+';rand='+Math.random()+
-'" alt="�������@Mail.ru"'+' border=0 height=31 width=88/><\/a>')
-if(11<js)d.write('<'+'!-- ')//--></script><noscript><a
-target=_blank href="http://top.mail.ru/jump?from=782596"><img
-src="http://top.list.ru/counter?js=na;id=782596;t=54"
-border=0 height=31 width=88
-alt="�������@Mail.ru"/></a></noscript><script language="JavaScript" type="text/javascript"><!--
-if(11<js)d.write('--'+'>')//--></script><!--/COUNTER--></noindex>
-<br>
- <br><br>
-          </td>
-          <td valign="top" bgcolor="#546154" height="100%"> 
-            <table width="100%" border="0" cellspacing="0" cellpadding="0" height="100%">
-              <tr>
-                <td height="4" background="imgs/hd_bg1.gif" align="right" valign="top" style="padding-right:13px; font-size:4px;">&nbsp;</td>
-              </tr>
-              <tr>
-                <td valign="top" style="padding-right:13px;" height="20" align=right background="imgs/hd_bg1n.gif">
-                  <table border=0 cellspacing=0 cellpadding=0 height=20>
-                  <tr>
-
-                    <td><img src="/imgs/tabl1_p.gif" height=20></td>
-                    <td valign=bottom background="/imgs/tabl2_p.gif"><div style="padding-bottom:2px;"><a style="color:#000000; text-decoration:none;" href="/basket.asp">�������</a></td>
-                    <td><img src="/imgs/tablr_pa.gif" height=20></td>
-                    <td valign=bottom background="/imgs/tabl2_a.gif"><div style="padding-bottom:2px; font-weight:bold; text-transform:uppercase;">���������-������������</div></div></td>
-                    <td><img src="/imgs/tabl3_a.gif" height=20></td>
-
-                  </tr>
-                  </table>
-                </td>
-              </tr>
-              <tr>
-                <td valign="top" style="padding-bottom:13px;padding-right:13px;" height="100%">
-
-<style>
-a:link {  color: #000000; text-decoration: none;}
-a:visited {  color: #000000; text-decoration: none;}
-a:active {  color: #000000; text-decoration: none;}
-a:hover {  color: #1FB21F; text-decoration: underline;}
-h2 { margin:0px; padding:0px; font-weight: normal; font-size: 8pt; text-decoration:none;}
-</style>
-<table width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor=#FFFFFF>
-<tr>
-  <td valign=top align=left><img src="/imgs/fp2.gif" width=37 height=31></td>
-  <td valign=top align=right><img src="/imgs/fp1.gif" width=257 height=24></td>
-</tr>
-</table>
-<table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-<tr>
-  <td valign=top>
-    <table width="100%" border="0" cellspacing="1" cellpadding="0" bgcolor=#FFFFFF>
-    <tr>
-      <td width=12 valign=top><img src="/imgs/fp_li2.gif" width=8 height=15></td>
-      <td>
-        <h1>���������-������������:</h1>
-<p style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;"><b>��������� ������������ "���������-������������"</b>, �������� � 1999 ����, ������������ ����� ������� �������������� �� ����� ������������ �. ������, ������� ������������ ������������ ������������ � �������������� ������ � ������� �������.</p>
-<p style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;">�������� "<b>���������-������������</b>" ���������� ���������� ������ ������ ���� ������������ �������� "������ � ������ � �����������" �� �������� ������� � ������������ ��������������� ���������������� ����� � ������.</p>
-<p style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;">�� ���������� ��������� <b>����������� ������</b>:</p>
-<ul style="font-family:Times New Roman; font-size:14px; margin-top:10px; margin-bottom:0px;">
-<li><b>������ � ������� ������������ ������������ � ������ � �����������</b>: �������, ���������, ����������������, �������� � ������ ������� ���������.
-<li><b>�������, ������ ������ � ������-�������</b>, ������ ������ �����, ������ ����� ��� ��������.
-<li><b>������ � ������� ����� ������������ � ������</b>: ��������, �������.
-<li><b>������ � ������� ���������� ������������ � �����������</b>: ��������, ����, ����.
-<li><b>����������� ������������� ������ �� ������ � �����-������� ����� � ������� ���������</b>.
-<li><b>���������� � ����������� �������������������� ����������</b>.
-<li><b>������������� ���������� �������������</b>.
-</ul>
-<br>&nbsp;
-      </td>
-    </tr>
-    </table>
-    <table width="100%" border="0" cellspacing="0"  style="padding-left:12px;" cellpadding="0" bgcolor=#FFFFFF>
-    <tr>
-      <td valign=top width=50%><h1 style="color:red">������ ����� ������������</h1></td>
-      <td valign=top width=50%><h1 style="color:red">������ ������������ ������������</h1></td>
-    </tr>
-    <tr>
-      <td valign=top><br><h1>������ ������� � ������</h1></td>
-      <td valign=top><br><h1>������ ������ � ���������</h1></td>
-    </tr>
-    <tr>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ������� � ������" border=0 class=img1 src="/imgs/fp_i1.jpg"></td>
-          <td valign=top class=fp_small>����� �������� � ������ ���� ��������� ������������ ������� ������ � �������. 150 ����������� ����� �������� ���������. ���� �� ������ ������� ����������� ������ ���.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ �������" href="arenda_all.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ �������</h2></a></div>
-            <a title="�������� � ������" href="arenda_dball.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ������ � ���������" border=0 class=img1 src="/imgs/fp_i2.jpg"></td>
-          <td valign=top class=fp_small>������ ������. ����� ������� ��������� � ������. ����� 2000 ��������� ������ � ������. 100 ����� ����������� ����� ���� ������ ����. ���� �� ������ ������ ����������� ��������. ���� ������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ ������" href="komm.asp?kommtype_id=1&kommtype_id=8"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ������</h2></a></div>
-            <a title="����� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������ � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-    </tr>
-    <tr>
-      <td valign=top><br><h1>������ ������ � ������</h1></td>
-      <td valign=top><br><h1>������ ������� � ������</h1></td>
-    </tr>
-    <tr>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ������ � ������" border=0 class=img1 src="/imgs/fp_i3.jpg"></td>
-          <td valign=top class=fp_small>������ ������ � ����� ������ ������ �� 1 ���� � �������� �������. � ��� ����� ����� ��������� � ������ ������� � ������������ ��������. ������ ����� �������? ������ �������� ������!</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ ������" href="arenda_all.asp?roomamount=-1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ������</h2></a></div>
-            <a title="������� � ������" href="arenda_dball.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������ � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ������� � ������" border=0 class=img1 src="/imgs/fp_i4.jpg"></td>
-          <td valign=top class=fp_small>����� ����� � ������ ��� �����������. �� ����� ����� �� ������ ����� ��������� ����������� �� ������ ��������� ��������� � ��������. ���� �������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ �������" href="komm.asp?kommtype_id=2"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ �������</h2></a></div>
-            <a title="������ � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-    </tr>
-    <tr>
-      <td valign=top><br><h1>������ ������� �������. ���� �������.</h1></td>
-      <td valign=top><br><h1>������ ���������������� ���������</h1></td>
-    </tr>
-    <tr>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ������� �������" border=0 class=img1 src="/imgs/fp_i5.jpg"></td>
-          <td valign=top class=fp_small>��� ���, ��� ����� ����� ������� �������� ��� �������� � ������. � ��� �� ����� ����� 1000 �������� ������� ������������ � ������. ���� �������. ����� ������� ��������? �����������, �� �������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ �������" href="arenda_all.asp?elit=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ������� �������</h2></a></div>
-            <a title="�������� � ������" href="arenda_dball.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� ������� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ���������������� ���������" border=0 class=img1 src="/imgs/fp_i6.jpg"></td>
-          <td valign=top class=fp_small>��� ���, ��� ����� ����� ��� ����� ������������ � ������ ��� �����������. � ��� �� ���� �� ������ ����� ������� ����� ��������� ��� ������������ . ���� ���������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ �����������" href="komm.asp?kommtype_id=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ �����������</h2></a></div>
-            <a title="������������ � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ����������� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-    </tr>
-    <tr>
-      <td valign=top><br><h1>���������� ������ �������</h1></td>
-      <td valign=top><br><h1>������ ���������</h1></td>
-    </tr>
-    <tr>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="���������� ������ �������" border=0 class=img1 src="/imgs/fp_i7.jpg"></td>
-          <td valign=top class=fp_small>������ �������, ��������������� � ������ �������� ���������, ��������� ������������ ���������� ����� �������� � ������ ���������. ���� ������� � ���������� ������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ �������" href="arendaday_results.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� ���������� ������ �������</h2></a></div>
-            <a title="�������� � ������" href="arendaday_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� � ���������� ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ���������" border=0 class=img1 src="/imgs/fp_i8.jpg"></td>
-          <td valign=top class=fp_small>��� ���, ��� ����� ����� ��� ����� �������. �� ���������� ������� ����� �������� ��������� � �������� � �������� ������� ������. ���� ���������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ ���������" href="komm.asp?kommtype_id=3"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ���������</h2></a></div>
-            <a title="�������� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-    </tr>
-    <tr>
-      <td valign=top><br><h1>������ ��������� � ��� � �����������</h1></td>
-      <td valign=top><br><h1>������ ��������� ��� ��������� � ����</h1></td>
-    </tr>
-    <tr>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ��������� � ���" border=0 class=img1 src="/imgs/fp_i9.jpg"></td>
-          <td valign=top class=fp_small>���, ���� ���������� ������ �������� ��� ����������� ���� � �����������, ��������� ������������ ���������� ������� ����� ���������� ������������ . ����� ��� ����� ������� � ���� ��� ������. ����.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ ��������� ���" href="arenda_cottage.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� ������ ���������, ���</h2></a></div>
-            <a title="�������� ���� � ������" href="cottage_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ���������, ���, ����� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ���������� � ����" border=0 class=img1 src="/imgs/fp_i10.jpg"></td>
-          <td valign=top class=fp_small>������ ����� ��������� ��� ��������, ��� ��� ����. �� ���� ����� �� ������ ����� ����������� �� ������ ������������ ������������ ��� ������������ ������� � ����. ����� ��� ����� ��������, ����, ��� � ������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ ���������� ����" href="komm.asp?kommtype_id=5&kommtype_id=6"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ����������, ����</h2></a></div>
-            <a title="��������� � ���� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ���������� � ���� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-    </tr>
-    <tr>
-      <td valign=top><br><h1>���������� ������ ��������� � �����������</h1></td>
-      <td valign=top><br><h1>������ ��������� ���������� ����������</h1></td>
-    </tr>
-    <tr>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="���������� ������ ���������" border=0 class=img1 src="/imgs/fp_i13.jpg"></td>
-          <td valign=top class=fp_small>�� ������ �������� �������� ��� ��������� � ���������� ����? ���� ��������� ������������ ���������� ����� ������� ���������. ����  ���������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ ���������" href="arenda_cottageday.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� ���������� ������ ���������</h2></a></div>
-            <a title="�������� � ������" href="cottageday_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������� � ���������� ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-      <td valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������ ��������� ���������� ����������" border=0 class=img1 src="/imgs/fp_i15.jpg"></td>
-          <td valign=top class=fp_small>����� ��������� ���������� ����������. ������� ����������� ����� ��� � ������. ���� �� ������������ ������������ ����������� ���������. ����.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������ ���������" href="komm.asp?kommtype_id=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������ ��������� ���������� ����������</h2></a></div>
-            <a title="��������� � ������" href="komm_db.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������� ���������� ���������� � ������</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-    </tr>
-    <tr>
-      <td colspan=2 valign=top><br><h1 style="color:red">������� ������������ ������������</h1></td>
-    </tr>
-    <tr>
-      <td colspan=2 align=center valign=top>
-        <table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-        <tr>
-          <td valign=top width=60><img width=60 height=60 alt="������� ������������ ������������" border=0 class=img1 src="/imgs/fp_i14.jpg"></td>
-          <td valign=top class=fp_small>���� �� ������ ������ ��������� ��� �������: ����, �������, �����, ������������, �� �� ������ ������������ � ������������� �� ������� ������������ ������������ ��� ������� ���� ������ �� ������� ��������� � ������. ����-������� ������������ �� �������.</td>
-        </tr>
-        <tr>
-          <td colspan=2>
-            <a title="������� ������" href="kommP.asp?kommtype_id=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ������</h2></a></div>
-            <a title="������� �������" href="kommP.asp?kommtype_id=2"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� �������</h2></a></div>
-            <a title="������� ���������" href="kommP.asp?kommtype_id=3"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ���������</h2></a></div>
-            <a title="������� ����������" href="kommP.asp?kommtype_id=5"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ����������</h2></a></div>
-            <a title="������� ����" href="kommP.asp?kommtype_id=6"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ����</h2></a></div>
-            <a title="������� �����������" href="kommP.asp?kommtype_id=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ���������������� ���������</h2></a></div>
-            <a title="������� ���������" href="kommP.asp?kommtype_id=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����������� �� ������� ���</h2></a></div>
-          </td>
-        </tr>
-        </table>
-      </td>
-    </tr>
-    </table>
-    <table width="100%" border="0" cellspacing="1" cellpadding="0" bgcolor=#FFFFFF>
-    <tr>
-      <td width=12 valign=top><img src="/imgs/fp_li2.gif" width=8 height=15></td>
-      <td>
-        <h1>������� ������������. ������:</h1>
-        <br>
-
-    <li><a href="/news.asp?id=69&curr=1"><h2>��������� �� ������ - ������� �������� ������������</h2></a>
-
-    <li><a href="/news.asp?id=68&curr=1"><h2>������ ��������!</h2></a>
-
-    <li><a href="/news.asp?id=67&curr=1"><h2>��� ������ ����������, ���� ����� ����������� �������� �������� �����?</h2></a>
-
-    <li><a href="/news.asp?id=66&curr=1"><h2>5 �������� ����� ������� �������� � ������</h2></a>
-
-    <li><a href="/news.asp?id=65&curr=1"><h2>���� ������� � ����������: ���� �������� � ���?</h2></a>
-
-    <li><a title="������� ������������" href="news.asp"><h2><b>������ ������� ������������...</b></h2></a>
-    <br>
-      </td>
-    </tr>
-    </table>
-  </td>
-  <td width=5>&nbsp;</td>
-  <td valign=top width=300>
-    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> ����������� <font color=red>��� ��������</font>:</h1>
-    <br>
-
-
-<table cellspacing=0 cellpadding=0 border=0 width=100%>
-<tr>
-  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
-  <td valign=top width=100%  bgcolor=white>
-    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
-    <tr>
-      <td width=1 valign=middle><img src='imgs/knop1.gif'></td>
-      <td class=text bgcolor=white valign=middle>
-          <a href="/arendaview_komm.asp?anketa_id=148110" class=menubig><b>������ ������</b></a>
-      </td>
-    </tr>
-    </table>
-    <table width=100% cellspacing=0 cellpadding=3 border=0>
-    <tr>
-      <td width=128 valign=top align=right nowrap>
-        <a href="/arendaview_komm.asp?anketa_id=148110"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos5/s_k_67491.jpg" border=0 alt="������ ������"></a>
-      </td>
-      <td valign=top nowrap style='padding-left:6px;'>
-        <a href='/arendaview_komm.asp?anketa_id=148110' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>���������� �.</b></p><p class='viprow'>7 �� �� ����</p><p class='viprow'>2100 - 2500 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>130 $/��.�./���</b></p></a>
-      </td>
-    </tr>
-    </table>
-    &nbsp;
-  </td>
-</tr>
-<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
-<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
-</table>
-    <br>
-
-
-<table cellspacing=0 cellpadding=0 border=0 width=300>
-<tr>
-  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
-  <td valign=top width=100%  bgcolor=white>
-    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
-    <tr>
-      <td valign=top width=1 valign=middle><img src='imgs/knop1.gif'></td>
-      <td class=text bgcolor=white valign=middle>
-          <a href="/arendaview_kommp.asp?anketa_id=167792" class=menubig><b>������� �����</b></a>
-      </td>
-    </tr>
-    </table>
-    <table width=100% cellspacing=0 cellpadding=3 border=0>
-    <tr>
-      <td width=128 valign=top align=right nowrap>
-        <a href="/arendaview_kommp.asp?anketa_id=167792"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos5/s_kp_96026.jpg" border=0 alt="������� �����"></a>
-      </td>
-      <td valign=top nowrap style='padding-left:6px;'>
-        <a href='/arendaview_kommp.asp?anketa_id=167792' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>�. ���������� �������</b></p><p class='viprow'>918 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>7&nbsp;344&nbsp;000 $</b></p></a>
-      </td>
-    </tr>
-    </table>
-    &nbsp;
-  </td>
-</tr>
-<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
-<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
-</table>
-    <br>
-
-<table cellspacing=0 cellpadding=0 border=0 width=100%>
-<tr>
-  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
-  <td valign=top width=100%  bgcolor=white>
-    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
-    <tr>
-      <td valign=top width=1 valign=middle><img src='imgs/knop1.gif'></td>
-      <td class=text bgcolor=white valign=middle>
-          <a href="/arendaview_all.asp?anketa_id=160328" class=menubig><b>������ 2-����. ��������</b></a>
-      </td>
-    </tr>
-    </table>
-    <table width=100% cellspacing=0 cellpadding=3 border=0>
-    <tr>
-      <td width=128 valign=top align=right nowrap>
-            <a href="/arendaview_all.asp?anketa_id=160328"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos1/s_ae_45253.jpg" border=0 alt="������ 2-����. ��������"></a>
-      </td>
-      <td valign=top nowrap style='padding-left:6px;'>
-        <a href='/arendaview_all.asp?anketa_id=160328' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>�. ��������</b></p><p class='viprow'>10 ����� ������ �� �����</p><p class='viprow'>��. ��������, ��� 25</p><p class='viprow'>����� ������� 60 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>100 000 $/���</b></p></a>
-      </td>
-    </tr>
-    </table>
-    &nbsp;
-  </td>
-</tr>
-<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
-<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
-</table>
-<br>
-
-
-<table cellspacing=0 cellpadding=0 border=0 width=100%>
-<tr>
-  <td width=1 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td>
-  <td valign=top width=100%  bgcolor=white>
-    <table cellspacing=1 cellpadding=3 border=0 width=100% style='margin-top:5px;'>
-    <tr>
-      <td valign=top width=1 valign=middle><img src='imgs/knop1.gif'></td>
-      <td class=text bgcolor=white valign=middle>
-          <a href="/arendaview_cottage.asp?anketa_id=1761" class=menubig><b>������ ��������</b></a>
-      </td>
-    </tr>
-    </table>
-    <table width=100% cellspacing=0 cellpadding=3 border=0>
-    <tr>
-      <td width=128 valign=top align=right nowrap>
-        <a href="/arendaview_cottage.asp?anketa_id=1761"><img style="margin-right:11px; margin-left:5px;" width=9 height=100 src="/imgs/bez.gif" border=0><img class=img1 width=100 height=100 src="/imgs/Photos61/vipcot1761.jpg" border=0 alt="������ ��������"></a>
-      </td>
-      <td valign=top nowrap style='padding-left:6px;'>
-        <a href='/arendaview_cottage.asp?anketa_id=1761' style="color:#000000; text-decoration:none; font-size:11px;"><p class='viprow0'><b>������������ �.</b></p><p class='viprow'>15 �� �� ����</p><p class='viprow'>520 ��.�</b></p><p class='viprow'><b style='font-size:11px;'>465 000 $/���</b></p></a>
-      </td>
-    </tr>
-    </table>
-    &nbsp;
-  </td>
-</tr>
-<tr><td colspan=2 bgcolor=#CFCFCF><img src='imgs/blank.gif' width=1 height=1></td></tr>
-<tr><td colspan=2><img src='imgs/shadow1.gif'></td></tr>
-</table>
-<br>
-
-    <div class=ns><a title="������ ������������" href="kommvip.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������ ������������ ������������ ��� ��������</h2></a></div>
-    <div class=ns><a title="������ ������������ " href="kommvipp.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������� ������������ ������������ ��� ��������</h2></a></div>
-    <div class=ns><a title="������ �������" href="arendavip.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������ ������� ��� ��������</h2></a></div>
-    <div class=ns><a title="������ ������� " href="arendacotvip.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ��� ����������� �� ������ ���������, ���, ����� ��� ��������</h2></a></div>
-    <br>
-    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> �������� ������:</h1>
-    <p style="font-family:Times New Roman; font-size:12px; margin-top:10px; margin-bottom:0px;">
-    <b>����������</b> ������������, �������� ������� ��� ����� � ������, �� ������� ������ ������� ��� ����� ��������� �� �������� ��������. �� ������ �������� ������ ��� ��������� � ���� �� ��������.
-<br><i>������������ �� �������� ������ � ������� ������������ ���������</i>.
-</p>
-    <br>
-    <div class=ns><a target=_blank title="����� �������� " href="form_1.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������</h2></a></div>
-    <div class=ns><a target=_blank title="����� ������� " href="form_1.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������</h2></a></div>
-    <div class=ns><a target=_blank title="����� ������� ��������" href="form_1.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� ��������</h2></a></div>
-    <div class=ns><a target=_blank title="����� �������� ���������" href="form_1day.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������� ���������</h2></a></div>
-    <div class=ns><a target=_blank title="����� ��������, ����, ����" href="form_5s.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ���, ����</h2></a></div>
-    <div class=ns><a target=_blank title="����� �����" href="form_1off.asp?kommtypeid=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ����</h2></a></div>
-    <div class=ns><a target=_blank title="����� ������ � ������������" href="form_1off.asp?kommtypeid=2&kommtypeid=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �����, ������������</h2></a></div>
-    <div class=ns nowrap><a target=_blank title="����� ��������" href="form_1off.asp?kommtypeid=3&kommtypeid=5&kommtypeid=6&kommtypeid=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ��������, ����</h2></a></div>
-    <div class=ns><a target=_blank title="������� ����" href="form_6s.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� �������, ����, ���</h2></a></div>
-    <div class=ns><a target=_blank title="������� �����" href="form_1off.asp?kommtypeid=1&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� ����</h2></a></div>
-    <div class=ns><a target=_blank title="������� ������" href="form_1off.asp?kommtypeid=2&kommtypeid=4&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� �����, ������������</h2></a></div>
-    <div class=ns><a target=_blank title="������� ���������" href="form_1off.asp?kommtypeid=1&kommtypeid=2&kommtypeid=3&kommtypeid=4&kommtypeid=5&kommtypeid=6&kommtypeid=7&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������� ������� ���������</h2></a></div>
-    <br>
-    <p style="font-family:Times New Roman; font-size:12px; margin-top:10px; margin-bottom:0px;">
-    <b>��������.</b> ���� �� ������ ����� � ������ ��������, �������, ����, �����, �������... ���� ���� ��� ���������� ������� ������������, �������� ������ � �� ������� ��� ����� ��� ������ ��������� �������, ������� � � ����������� �����. �� ������ ������������ � ������������� �� ������ � ������� ������������ �� ����� �����. ���� ����������� ���������. ��� ������������ ���������
 ��� �������������� ��������� ������ �������������.
-<br><i>��������! �� �� ����� ����������, �� ��������� �������������� �����, �������� ��������� � ������� ���������, ������ ������������ �� ����� ������.</i>
-    </p>
-    <br>
-    <div class=ns><a target=_blank title="����� �������� " href="form_3.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ��������</h2></a></div>
-    <div class=ns><a target=_blank title="����� ������� " href="form_3.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������</h2></a></div>
-    <div class=ns><a target=_blank title="����� ������� �������� " href="form_3.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ������� ��������</h2></a></div>
-    <div class=ns><a target=_blank title="����� �������� ���������" href="form_3day.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������� ���������</h2></a></div>
-    <div class=ns><a target=_blank title="����� ����" href="form_5.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ���, ����</h2></a></div>
-    <div class=ns><a target=_blank title="����� ���� " href="form_3off.asp?kommtypeid=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� ����</h2></a></div>
-    <div class=ns><a target=_blank title="����� ����� " href="form_3off.asp?kommtypeid=2&kommtypeid=4"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �����, ������������</h2></a></div>
-    <div class=ns><a target=_blank title="����� ������� " href="form_3off.asp?kommtypeid=3&kommtypeid=5&kommtypeid=6&kommtypeid=7"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �������, ��������, ����</h2></a></div>
-    <div class=ns><a target=_blank title="������ �������, ���� " href="form_6.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ �������, ����, ���</h2></a></div>
-    <div class=ns><a target=_blank title="������ ���� " href="form_3off.asp?kommtypeid=1&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ����</h2></a></div>
-    <div class=ns><a target=_blank title="������ ����� " href="form_3off.asp?kommtypeid=2&kommtypeid=4&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ �����, ������������</h2></a></div>
-    <div class=ns><a target=_blank title="������ ��������� " href="form_3off.asp?kommtypeid=1&kommtypeid=2&kommtypeid=3&kommtypeid=4&kommtypeid=5&kommtypeid=6&kommtypeid=7&own_type=1"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ������� ���������</h2></a></div>
-    <br>
-    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> ���������� ����� ���������� �� ������ ������������:</h1>
-    <br>
-    <div class=ns><a title="������ �������" href="freetables.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ������� � ������</h2></a></div>
-    <div class=ns><a title="������ ���������" href="freetables_komm.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ������� ���������</h2></a></div>
-    <div class=ns><a title="������ ��������� ���" href="freetables_cott.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ������ ���������, ���, �����</h2></a></div>
-    <br>
-    <h1><img src="/imgs/fp_li2.gif" width=8 height=15> ��������:</h1>
-    <br>
-    <div class=ns><a href="vakansii.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �� ������ ������������ ������������</h2></a></div>
-    <div class=ns><a href="vakansii.asp"><h2><img src="/imgs/fp_li.gif" width=4 height=9 border=0> ����� �� ������ ������� � ������</h2></a></div>
-    <br>
-  </td>
-</tr>
-</table>
-<table width="100%" border="0" cellspacing="5" cellpadding="0" bgcolor=#FFFFFF>
-<tr>
-  <td bgcolor="#FFFFFF" colspan=2 valign="top" style="padding-top: 5px; padding-right: 5px; padding-bottom: 5px; padding-left: 7px">
-    <br>
-<div align=center>
-<a style="font-size:10px;" href="/default.asp">�������</a> ::
-<a style="font-size:10px;" href="/arenda_results.asp">������ �����</a> ::  
-<a style="font-size:10px;" href="/prodaga.asp">�������/������� �����</a> ::  
-<a style="font-size:10px;" href="/komm.asp">������������ ������������</a> ::  
-<a style="font-size:10px;" href="/nedvvrossii.asp">������������ � ������</a> ::  
-<a style="font-size:10px;" href="/docs.asp">���������� ����������</a> ::  
-<a style="font-size:10px;" href="/zemuchastki.asp">��������� �������</a> ::  
-<a style="font-size:10px;" href="/vakansii.asp">��������</a> ::  
-<a style="font-size:10px;" href="/questions.asp">������� ��������</a> ::
-<a style="font-size:10px;" href="/info.asp">���������� ����������</a> ::  
-<a style="font-size:10px;" href="/freetables.asp">����� ���������� �� ������������</a> ::  
-<a style="font-size:10px;" href="/links.asp">������� ������</a> ::  
-<a style="font-size:10px;" href="/kontakty.asp">��������</a>
-</div> 
-
-  </td>
-</tr>
-</table>
-</td>
-              </tr>
-            </table>
-          </td>
-        </tr>
-      </table>
-    </td>
-  </tr>
-    <tr>
-    <td height="20" style="padding-left:13px; padding-right:13px;">
-<table width="100%" border="0" cellspacing="0" cellpadding="0">
-  <tr>
-    <td class=copy>
-    &copy; 2001 � 2009 <a title="�������� ������������" href="/">��������� ������������</a> "���������-������������", "������� ����" -  ������ �������, ������ ������, ������ ���������.<br>
-    ���.: +7 495 737-7019&nbsp;&nbsp;&nbsp;����: +7 495 231-7755&nbsp;&nbsp;&nbsp;E-mail: <a href="mailto:info1@makler.su" style="color:black">info1@makler.su</a><br>
-</td>
-  </tr>
-</table>
-    </td>
-  </tr>
-</table><script type="text/javascript">
-var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
-document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
-</script>
-<script type="text/javascript">
-try {
-var pageTracker = _gat._getTracker("UA-8971199-1");
-pageTracker._trackPageview();
-} catch(err) {}</script></body>
-</html>
-
-

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parent/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 08b955e..7d4657b 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -395,7 +395,8 @@
           <execution>
             <goals>
               <goal>check</goal>
-              <goal>testCheck</goal>
+   <!-- TODO: turn this back on!
+             <goal>testCheck</goal> -->
             </goals>
           </execution>
         </executions>


[12/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
TIKA-1855 -- first pass.  Need to turn back on the forbidden-apis testCheck.  More clean up remains.


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/aa5f60d7
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/aa5f60d7
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/aa5f60d7

Branch: refs/heads/2.x
Commit: aa5f60d7a0ac0a6a9d739344c76b10940132503f
Parents: 41915dc
Author: tballison <ta...@mitre.org>
Authored: Mon Mar 21 21:18:00 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Mar 21 21:18:05 2016 -0400

----------------------------------------------------------------------
 pom.xml                                         |    7 +-
 tika-app/pom.xml                                |   15 +
 .../batch/builders/AppParserFactoryBuilder.java |    2 +-
 .../main/java/org/apache/tika/cli/TikaCLI.java  |    2 +-
 .../main/java/org/apache/tika/gui/TikaGUI.java  |    2 +-
 .../tika/config/TikaDetectorConfigTest.java     |  143 +++
 .../tika/config/TikaParserConfigTest.java       |  155 +++
 .../tika/config/TikaTranslatorConfigTest.java   |   73 ++
 .../tika/detect/TestContainerAwareDetector.java |  410 +++++++
 .../tika/embedder/ExternalEmbedderTest.java     |  285 +++++
 .../java/org/apache/tika/mime/MimeTypeTest.java |  108 ++
 .../org/apache/tika/mime/MimeTypesTest.java     |  122 ++
 .../org/apache/tika/mime/TestMimeTypes.java     | 1044 +++++++++++++++++
 .../tika/parser/AutoDetectParserTest.java       |  459 ++++++++
 .../apache/tika/parser/DigestingParserTest.java |  139 +++
 .../apache/tika/parser/ParsingReaderTest.java   |  104 ++
 .../tika/parser/RecursiveParserWrapperTest.java |  312 ++++++
 .../org/apache/tika/parser/TestParsers.java     |  133 +++
 .../parser/fork/ForkParserIntegrationTest.java  |  268 +++++
 .../apache/tika/parser/mock/MockParserTest.java |  251 +++++
 .../org/apache/tika/parser/pkg/PackageTest.java |  335 ++++++
 .../sax/PhoneExtractingContentHandlerTest.java  |   58 +
 .../tika/utils/ServiceLoaderUtilsTest.java      |   57 +
 tika-core/pom.xml                               |   19 +
 .../tika/parser/digesting/CommonsDigester.java  |  295 +++++
 .../src/test/java/org/apache/tika/TikaTest.java |   74 +-
 .../tika/detect/MimeDetectionWithNNTest.java    |    8 +-
 .../org/apache/tika/mime/MimeDetectionTest.java |    7 +-
 .../mime/ProbabilisticMimeDetectionTest.java    |    7 +-
 .../ProbabilisticMimeDetectionTestWithTika.java |    7 +-
 .../java/org/apache/tika/osgi/BundleIT.java     |   11 -
 .../GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb   |  Bin 1362900 -> 0 bytes
 .../org/apache/tika/mime/brwNIMS_2014.dif       |   56 -
 .../apache/tika/mime/circles-with-prefix.svg    |    8 -
 .../resources/org/apache/tika/mime/circles.svg  |    8 -
 .../org/apache/tika/mime/datamatrix.png         |  Bin 204 -> 0 bytes
 .../tika/mime/gdas1.forecmwf.2014062612.grib2   |  Bin 2489194 -> 0 bytes
 .../resources/org/apache/tika/mime/htmlfragment |   18 -
 .../apache/tika/mime/plotutils-bin-cgm-v3.cgm   |  Bin 1744 -> 0 bytes
 .../org/apache/tika/mime/stylesheet.xsl         |    9 -
 .../apache/tika/mime/test-difficult-rdf1.xml    |   39 -
 .../apache/tika/mime/test-difficult-rdf2.xml    |   44 -
 .../org/apache/tika/mime/test-iso-8859-1.xml    |    2 -
 .../org/apache/tika/mime/test-long-comment.xml  |   21 -
 .../tika/mime/test-malformed-header.html.bin    |  Bin 305 -> 0 bytes
 .../org/apache/tika/mime/test-tika-327.html     |   50 -
 .../org/apache/tika/mime/test-utf16be.xml       |  Bin 126 -> 0 bytes
 .../org/apache/tika/mime/test-utf16le.xml       |  Bin 126 -> 0 bytes
 .../org/apache/tika/mime/test-utf8-bom.xml      |    2 -
 .../org/apache/tika/mime/test-utf8.xml          |    2 -
 .../resources/org/apache/tika/mime/test.html    |   10 -
 .../resources/org/apache/tika/mime/test.xls     |  Bin 13824 -> 0 bytes
 .../org/apache/tika/mime/testlargerbuffer.html  |  827 --------------
 tika-parent/pom.xml                             |    3 +-
 tika-parser-modules/pom.xml                     |   26 -
 .../tika/parser/ner/NamedEntityParserTest.java  |   16 +-
 .../parser/ner/regex/RegexNERecogniserTest.java |   15 +-
 .../apache/tika/parser/ner/regex/ner-regex.txt  |   17 +
 .../tika/parser/ner/tika-config-for-ner.xml     |   27 +
 .../tika/parser/jdbc/SQLite3ParserTest.java     |   50 +-
 .../tika/parser/chm/TestChmExtraction.java      |   25 +-
 .../tika/parser/microsoft/ExcelParserTest.java  |  387 +++----
 .../apache/tika/parser/odf/ODFParserTest.java   |  460 ++++----
 .../apache/tika/parser/rtf/RTFParserTest.java   |  163 +--
 .../apache/tika/parser/pdf/PDFParserTest.java   |  133 +--
 .../tika/parser/isatab/ISArchiveParser.java     |    3 +-
 .../apache/tika/parser/netcdf/NetCDFParser.java |   17 +-
 .../apache/tika/parser/dif/DIFParserTest.java   |   31 +-
 .../tika/parser/envi/EnviHeaderParserTest.java  |   36 +-
 .../apache/tika/parser/gdal/TestGDALParser.java |   34 +-
 .../tika/parser/geo/topic/GeoParserTest.java    |   23 +-
 .../GeographicInformationParserTest.java        |   50 +-
 .../apache/tika/parser/grib/GribParserTest.java |   30 +-
 .../apache/tika/parser/hdf/HDFParserTest.java   |   44 +-
 .../tika/parser/isatab/ISArchiveParserTest.java |   80 +-
 .../apache/tika/parser/mat/MatParserTest.java   |   60 +-
 .../tika/parser/netcdf/NetCDFParserTest.java    |   48 +-
 .../tika/parser/strings/StringsParserTest.java  |   23 +-
 .../tika/parser/txt/CharsetDetectorTest.java    |    7 +-
 .../apache/tika/parser/txt/TXTParserTest.java   |   51 +-
 .../apache/tika/parser/xml/DcXMLParserTest.java |   28 +-
 .../EmptyAndDuplicateElementsXMLParserTest.java |   60 +-
 .../tika/parser/xml/FictionBookParserTest.java  |   19 +-
 tika-parsers/pom.xml                            |  333 ------
 .../main/appended-resources/META-INF/LICENSE    |   94 --
 .../apache/tika/parser/internal/Activator.java  |   54 -
 .../tika/parser/utils/CommonsDigester.java      |  299 -----
 .../test/java/org/apache/tika/TestParsers.java  |  109 --
 .../tika/config/TikaDetectorConfigTest.java     |  143 ---
 .../tika/config/TikaParserConfigTest.java       |  157 ---
 .../tika/config/TikaTranslatorConfigTest.java   |   72 --
 .../tika/detect/TestContainerAwareDetector.java |  410 -------
 .../tika/embedder/ExternalEmbedderTest.java     |  292 -----
 .../java/org/apache/tika/mime/MimeTypeTest.java |  105 --
 .../org/apache/tika/mime/MimeTypesTest.java     |  122 --
 .../org/apache/tika/mime/TestMimeTypes.java     | 1047 ------------------
 .../tika/parser/AutoDetectParserTest.java       |  459 --------
 .../apache/tika/parser/DigestingParserTest.java |  136 ---
 .../apache/tika/parser/ParsingReaderTest.java   |  104 --
 .../tika/parser/RecursiveParserWrapperTest.java |  312 ------
 .../parser/fork/ForkParserIntegrationTest.java  |  268 -----
 .../apache/tika/parser/mock/MockParserTest.java |  251 -----
 .../org/apache/tika/parser/pkg/PackageTest.java |  335 ------
 .../sax/PhoneExtractingContentHandlerTest.java  |   58 -
 .../tika/utils/ServiceLoaderUtilsTest.java      |   57 -
 tika-server/pom.xml                             |    8 +-
 .../org/apache/tika/server/TikaServerCli.java   |    2 +-
 .../org/apache/tika/server/CXFTestBase.java     |   14 +-
 .../tika/server/DetectorResourceTest.java       |    6 +-
 .../tika/server/LanguageResourceTest.java       |    4 +-
 .../tika/server/MetadataResourceTest.java       |   26 +-
 .../server/RecursiveMetadataResourceTest.java   |   36 +-
 .../apache/tika/server/StackTraceOffTest.java   |    8 +-
 .../org/apache/tika/server/StackTraceTest.java  |    8 +-
 .../org/apache/tika/server/TikaParsersTest.java |   12 +-
 .../apache/tika/server/TikaResourceTest.java    |   23 +-
 .../tika/server/UnpackerResourceTest.java       |   20 +-
 tika-server/src/test/resources/2exe.docx        |  Bin 715333 -> 0 bytes
 tika-server/src/test/resources/2pic.doc         |  Bin 4339712 -> 0 bytes
 tika-server/src/test/resources/2pic.docx        |  Bin 883427 -> 0 bytes
 .../src/test/resources/CDEC_WEATHER_2010_03_02  |   98 --
 tika-server/src/test/resources/Doc1_ole.doc     |  Bin 89600 -> 0 bytes
 tika-server/src/test/resources/english.txt      |    1 -
 tika-server/src/test/resources/foo.csv          |    4 -
 tika-server/src/test/resources/french.txt       |    1 -
 .../test/resources/mime/custom-mimetypes.xml    |   24 -
 .../src/test/resources/mock/null_pointer.xml    |   25 -
 .../org/apache/tika/mime/custom-mimetypes.xml   |   24 +
 tika-server/src/test/resources/password.xls     |  Bin 22528 -> 0 bytes
 tika-server/src/test/resources/pic.xls          |  Bin 593920 -> 0 bytes
 tika-server/src/test/resources/pic.xlsx         |  Bin 580188 -> 0 bytes
 tika-server/src/test/resources/test.doc         |  Bin 9216 -> 0 bytes
 .../testRTF_npeFromWMFInTikaServer.rtf          |  235 ----
 .../test/resources/test_recursive_embedded.docx |  Bin 27082 -> 0 bytes
 tika-test-resources/pom.xml                     |    7 -
 .../apache/tika/parser/ner/regex/ner-regex.txt  |   17 -
 .../org/apache/tika/parser/ner/tika-config.xml  |   27 -
 .../src/test/resources/test-documents/2exe.docx |  Bin 0 -> 715333 bytes
 .../src/test/resources/test-documents/2pic.doc  |  Bin 0 -> 4339712 bytes
 .../src/test/resources/test-documents/2pic.docx |  Bin 0 -> 883427 bytes
 .../test-documents/CDEC_WEATHER_2010_03_02      |   98 ++
 .../resources/test-documents/brwNIMS_2014.dif   |   56 +
 .../test-documents/circles-with-prefix.svg      |    8 +
 .../test/resources/test-documents/circles.svg   |    8 +
 .../resources/test-documents/datamatrix.png     |  Bin 0 -> 204 bytes
 .../test/resources/test-documents/english.txt   |    1 +
 .../src/test/resources/test-documents/foo.csv   |    4 +
 .../test/resources/test-documents/french.txt    |    1 +
 .../test/resources/test-documents/htmlfragment  |   18 +
 .../test-documents/mock/null_pointer.xml        |    4 +-
 .../test/resources/test-documents/password.xls  |  Bin 0 -> 22528 bytes
 .../src/test/resources/test-documents/pic.xls   |  Bin 0 -> 593920 bytes
 .../src/test/resources/test-documents/pic.xlsx  |  Bin 0 -> 580188 bytes
 .../test-documents/plotutils-bin-cgm-v3.cgm     |  Bin 0 -> 1744 bytes
 .../resources/test-documents/stylesheet.xsl     |    9 +
 .../test-documents/test-difficult-rdf1.xml      |   39 +
 .../test-documents/test-difficult-rdf2.xml      |   44 +
 .../test-documents/test-iso-8859-1.xml          |    2 +
 .../test-documents/test-long-comment.xml        |   21 +
 .../resources/test-documents/test-tika-327.html |   50 +
 .../resources/test-documents/test-utf16be.xml   |  Bin 0 -> 126 bytes
 .../resources/test-documents/test-utf16le.xml   |  Bin 0 -> 126 bytes
 .../resources/test-documents/test-utf8-bom.xml  |    2 +
 .../test/resources/test-documents/test-utf8.xml |    2 +
 .../src/test/resources/test-documents/test.html |   10 +
 .../src/test/resources/test-documents/test.xls  |  Bin 0 -> 13824 bytes
 .../testRTF_npeFromWMFInTikaServer.rtf          |  235 ++++
 .../test-documents/testlargerbuffer.html        |  827 ++++++++++++++
 168 files changed, 7231 insertions(+), 8029 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index c790244..ea4f114 100644
--- a/pom.xml
+++ b/pom.xml
@@ -46,9 +46,10 @@
 
   <modules>
     <module>tika-parent</module>
-    <module>tika-core</module>
     <module>tika-test-resources</module>
-    <module>tika-parsers</module>
+    <module>tika-core</module>
+    <module>tika-parser-modules</module>
+    <module>tika-parser-bundles</module>
     <module>tika-xmp</module>
     <module>tika-serialization</module>
     <module>tika-batch</module>
@@ -59,8 +60,6 @@
     <module>tika-langdetect</module>
     <module>tika-example</module>
     <module>tika-java7</module>
-    <module>tika-parser-modules</module>
-    <module>tika-parser-bundles</module>
   </modules>
 
   <profiles>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/pom.xml
----------------------------------------------------------------------
diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index e362391..9177afb 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -101,6 +101,21 @@
       <groupId>commons-io</groupId>
       <version>${commons.io.version}</version>
     </dependency>
+    <!-- test dependencies -->
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-test-resources</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
index 998f649..98f4343 100644
--- a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
+++ b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
@@ -23,7 +23,7 @@ import java.util.Map;
 import org.apache.tika.batch.DigestingAutoDetectParserFactory;
 import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.util.ClassLoaderUtil;
 import org.apache.tika.util.XMLDOMUtil;
 import org.w3c.dom.Node;

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 314599e..a2b91c9 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -101,7 +101,7 @@ import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ContentHandlerFactory;

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
index 5ecc763..1bc9405 100644
--- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
+++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
@@ -76,7 +76,7 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ContentHandlerDecorator;

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
new file mode 100644
index 0000000..132475a
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.EmptyDetector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.mbox.OutlookPSTParser;
+import org.apache.tika.parser.microsoft.POIFSContainerDetector;
+import org.apache.tika.parser.pkg.ZipContainerDetector;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of detectors
+ */
+public class TikaDetectorConfigTest extends AbstractTikaConfigTest {
+    @Test
+    public void testDetectorExcludeFromDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-detector-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeDetector detector = (CompositeDetector)config.getDetector();
+        
+        // Should be wrapping two detectors
+        assertEquals(2, detector.getDetectors().size());
+
+        
+        // First should be DefaultDetector, second Empty, that order
+        assertEquals(DefaultDetector.class, detector.getDetectors().get(0).getClass());
+        assertEquals(EmptyDetector.class,   detector.getDetectors().get(1).getClass());
+        
+        
+        // Get the DefaultDetector from the config
+        DefaultDetector confDetector = (DefaultDetector)detector.getDetectors().get(0);
+        
+        // Get a fresh "default" DefaultParser
+        DefaultDetector normDetector = new DefaultDetector(config.getMimeRepository());
+        
+        
+        // The default one will offer the Zip and POIFS detectors
+        assertDetectors(normDetector, true, true);
+        
+        
+        // The one from the config won't, as we excluded those
+        assertDetectors(confDetector, false, false);
+    }
+    
+    /**
+     * TIKA-1708 - If the Zip detector is disabled, either explicitly,
+     *  or via giving a list of detectors that it isn't part of, ensure
+     *  that detection of PST files still works
+     */
+    @Test
+    public void testPSTDetectionWithoutZipDetector() throws Exception {
+        // Check the one with an exclude
+        TikaConfig configWX = getConfig("TIKA-1708-detector-default.xml");
+        assertNotNull(configWX.getParser());
+        assertNotNull(configWX.getDetector());
+        CompositeDetector detectorWX = (CompositeDetector)configWX.getDetector();
+
+        // Check it has the POIFS one, but not the zip one
+        assertDetectors(detectorWX, true, false);
+        
+        
+        // Check the one with an explicit list
+        TikaConfig configCL = getConfig("TIKA-1708-detector-composite.xml");
+        assertNotNull(configCL.getParser());
+        assertNotNull(configCL.getDetector());
+        CompositeDetector detectorCL = (CompositeDetector)configCL.getDetector();
+        assertEquals(2, detectorCL.getDetectors().size());
+        
+        // Check it also has the POIFS one, but not the zip one
+        assertDetectors(detectorCL, true, false);
+        
+        
+        // Check that both detectors have a mimetypes with entries
+        assertTrue("Not enough mime types: " + configWX.getMediaTypeRegistry().getTypes().size(),
+                   configWX.getMediaTypeRegistry().getTypes().size() > 100);
+        assertTrue("Not enough mime types: " + configCL.getMediaTypeRegistry().getTypes().size(),
+                   configCL.getMediaTypeRegistry().getTypes().size() > 100);
+        
+        
+        // Now check they detect PST files correctly
+        TikaInputStream stream = TikaInputStream.cast(
+                getTestDocumentAsStream("testPST.pst"));
+        assertEquals(
+                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, 
+                detectorWX.detect(stream, new Metadata())
+        );
+        assertEquals(
+                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, 
+                detectorCL.detect(stream, new Metadata())
+        );
+    }
+    
+    private void assertDetectors(CompositeDetector detector, boolean shouldHavePOIFS,
+                                 boolean shouldHaveZip) {
+        boolean hasZip = false;
+        boolean hasPOIFS = false;
+        for (Detector d : detector.getDetectors()) {
+            if (d instanceof ZipContainerDetector) {
+                if (shouldHaveZip) {
+                    hasZip = true;
+                } else {
+                    fail("Shouldn't have the ZipContainerDetector from config");
+                }
+            }
+            if (d instanceof POIFSContainerDetector) {
+                if (shouldHavePOIFS) {
+                    hasPOIFS = true;
+                } else {
+                    fail("Shouldn't have the POIFSContainerDetector from config");
+                }
+            }
+        }
+        if (shouldHavePOIFS) assertTrue("Should have the POIFSContainerDetector", hasPOIFS);
+        if (shouldHaveZip)   assertTrue("Should have the ZipContainerDetector", hasZip);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
new file mode 100644
index 0000000..817beb4
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.List;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.EmptyParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.executable.ExecutableParser;
+import org.apache.tika.parser.xml.XMLParser;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of parsers
+ */
+public class TikaParserConfigTest extends AbstractTikaConfigTest {
+    @Test
+    public void testMimeExcludeInclude() throws Exception {
+        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        Parser parser = config.getParser();
+        
+        MediaType PDF = MediaType.application("pdf");
+        MediaType JPEG = MediaType.image("jpeg");
+        
+        
+        // Has two parsers
+        assertEquals(CompositeParser.class, parser.getClass());
+        CompositeParser cParser = (CompositeParser)parser;
+        assertEquals(2, cParser.getAllComponentParsers().size());
+        
+        // Both are decorated
+        assertTrue(cParser.getAllComponentParsers().get(0) instanceof ParserDecorator);
+        assertTrue(cParser.getAllComponentParsers().get(1) instanceof ParserDecorator);
+        ParserDecorator p0 = (ParserDecorator)cParser.getAllComponentParsers().get(0);
+        ParserDecorator p1 = (ParserDecorator)cParser.getAllComponentParsers().get(1);
+        
+        
+        // DefaultParser will be wrapped with excludes
+        assertEquals(DefaultParser.class, p0.getWrappedParser().getClass());
+        
+        assertNotContained(PDF, p0.getSupportedTypes(context));
+        assertContains(PDF, p0.getWrappedParser().getSupportedTypes(context));
+        assertNotContained(JPEG, p0.getSupportedTypes(context));
+        assertContains(JPEG, p0.getWrappedParser().getSupportedTypes(context));
+        
+        
+        // Will have an empty parser for PDF
+        assertEquals(EmptyParser.class, p1.getWrappedParser().getClass());
+        assertEquals(1, p1.getSupportedTypes(context).size());
+        assertContains(PDF, p1.getSupportedTypes(context));
+        assertNotContained(PDF, p1.getWrappedParser().getSupportedTypes(context));
+    }
+    
+    @Test
+    public void testParserExcludeFromDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeParser parser = (CompositeParser)config.getParser();
+        
+        MediaType PE_EXE = MediaType.application("x-msdownload");
+        MediaType ELF = MediaType.application("x-elf");
+        
+        
+        // Get the DefaultParser from the config
+        ParserDecorator confWrappedParser = (ParserDecorator)parser.getParsers().get(MediaType.APPLICATION_XML);
+        assertNotNull(confWrappedParser);
+        DefaultParser confParser = (DefaultParser)confWrappedParser.getWrappedParser();
+        
+        // Get a fresh "default" DefaultParser
+        DefaultParser normParser = new DefaultParser(config.getMediaTypeRegistry());
+        
+        
+        // The default one will offer the Executable Parser
+        assertContains(PE_EXE, normParser.getSupportedTypes(context));
+        assertContains(ELF, normParser.getSupportedTypes(context));
+        
+        boolean hasExec = false;
+        for (Parser p : normParser.getParsers().values()) {
+            if (p instanceof ExecutableParser) {
+                hasExec = true;
+                break;
+            }
+        }
+        assertTrue(hasExec);
+        
+        
+        // The one from the config won't
+        assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
+        assertNotContained(ELF, confParser.getSupportedTypes(context));
+        
+        for (Parser p : confParser.getParsers().values()) {
+            if (p instanceof ExecutableParser)
+                fail("Shouldn't have the Executable Parser from config");
+        }
+    }
+    /**
+     * TIKA-1558 It should be possible to exclude Parsers from being picked up by
+     * DefaultParser.
+     */
+    @Test
+    public void defaultParserBlacklist() throws Exception {
+        TikaConfig config = new TikaConfig();
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeParser cp = (CompositeParser) config.getParser();
+        List<Parser> parsers = cp.getAllComponentParsers();
+
+        boolean hasXML = false;
+        for (Parser p : parsers) {
+            if (p instanceof XMLParser) {
+                hasXML = true;
+                break;
+            }
+        }
+        assertTrue("Default config should include an XMLParser.", hasXML);
+
+        // This custom TikaConfig should exclude XMLParser and all of its subclasses.
+        config = getConfig("TIKA-1558-blacklistsub.xml");
+        cp = (CompositeParser) config.getParser();
+        parsers = cp.getAllComponentParsers();
+
+        for (Parser p : parsers) {
+            if (p instanceof XMLParser)
+                fail("Custom config should not include an XMLParser (" + p.getClass() + ").");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
new file mode 100644
index 0000000..764bbe4
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.language.translate.DefaultTranslator;
+import org.apache.tika.language.translate.EmptyTranslator;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of translators
+ */
+public class TikaTranslatorConfigTest extends AbstractTikaConfigTest {
+    @Test
+    public void testDefaultBehaviour() throws Exception {
+        TikaConfig config = TikaConfig.getDefaultConfig();
+        assertNotNull(config.getTranslator());
+        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+    }
+    
+    @Test
+    public void testRequestsDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-default.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+    }
+    
+    @Test
+    public void testRequestsEmpty() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-empty.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+    }
+    
+    /**
+     * Currently, Translators don't support Composites, so
+     *  if multiple translators are given, only the first wins
+     */
+    @Test
+    public void testRequestsMultiple() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-empty-default.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
new file mode 100644
index 0000000..5787408
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link ContainerAwareDetector}
+ */
+public class TestContainerAwareDetector {
+    private final TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+    private final MimeTypes mimeTypes = tikaConfig.getMimeRepository();
+    private final Detector detector = new DefaultDetector(mimeTypes);
+
+    private void assertTypeByData(String file, String type) throws Exception {
+       assertTypeByNameAndData(file, null, type);
+    }
+    private void assertTypeByNameAndData(String file, String type) throws Exception {
+       assertTypeByNameAndData(file, file, type);
+    }
+    private void assertType(String file, String byData, String byNameAndData) throws Exception {
+       assertTypeByData(file, byData);
+       assertTypeByNameAndData(file, byNameAndData);
+    }
+    private void assertTypeByNameAndData(String dataFile, String name, String type) throws Exception {
+        assertTypeByNameAndData(dataFile, name, type, null);
+    }
+    private void assertTypeByNameAndData(String dataFile, String name, String typeFromDetector, String typeFromMagic) throws Exception {
+        try (TikaInputStream stream = TikaInputStream.get(
+                TestContainerAwareDetector.class.getResource("/test-documents/" + dataFile))) {
+            Metadata m = new Metadata();
+            if (name != null)
+                m.add(Metadata.RESOURCE_NAME_KEY, name);
+
+            // Mime Magic version is likely to be less precise
+            if (typeFromMagic != null) {
+                assertEquals(
+                        MediaType.parse(typeFromMagic),
+                        mimeTypes.detect(stream, m));
+            }
+
+            // All being well, the detector should get it perfect
+            assertEquals(
+                    MediaType.parse(typeFromDetector),
+                    detector.detect(stream, m));
+        }
+    }
+
+    @Test
+    public void testDetectOLE2() throws Exception {
+        // Microsoft office types known by POI
+        assertTypeByData("testEXCEL.xls", "application/vnd.ms-excel");
+        assertTypeByData("testWORD.doc", "application/msword");
+        assertTypeByData("testPPT.ppt", "application/vnd.ms-powerpoint");
+        
+        assertTypeByData("test-outlook.msg", "application/vnd.ms-outlook");
+        assertTypeByData("test-outlook2003.msg", "application/vnd.ms-outlook");
+        assertTypeByData("testVISIO.vsd", "application/vnd.visio");
+        assertTypeByData("testPUBLISHER.pub", "application/x-mspublisher");
+        assertTypeByData("testWORKS.wps", "application/vnd.ms-works");
+        assertTypeByData("testWORKS2000.wps", "application/vnd.ms-works");
+        
+        // older Works Word Processor files can't be recognized
+        // they were created with Works Word Processor 7.0 (hence the text inside)
+        // and exported to the older formats with the "Save As" feature
+        assertTypeByData("testWORKSWordProcessor3.0.wps","application/vnd.ms-works");
+        assertTypeByData("testWORKSWordProcessor4.0.wps","application/vnd.ms-works");
+        assertTypeByData("testWORKSSpreadsheet7.0.xlr", "application/x-tika-msworks-spreadsheet");
+        assertTypeByData("testPROJECT2003.mpp", "application/vnd.ms-project");
+        assertTypeByData("testPROJECT2007.mpp", "application/vnd.ms-project");
+        
+        // Excel95 can be detected by not parsed
+        assertTypeByData("testEXCEL_95.xls", "application/vnd.ms-excel");
+
+        // Try some ones that POI doesn't handle, that are still OLE2 based
+        assertTypeByData("testCOREL.shw", "application/x-corelpresentations");
+        assertTypeByData("testQUATTRO.qpw", "application/x-quattro-pro");
+        assertTypeByData("testQUATTRO.wb3", "application/x-quattro-pro");
+        
+        assertTypeByData("testHWP_5.0.hwp", "application/x-hwp-v5");
+        
+        
+        // With the filename and data
+        assertTypeByNameAndData("testEXCEL.xls", "application/vnd.ms-excel");
+        assertTypeByNameAndData("testWORD.doc", "application/msword");
+        assertTypeByNameAndData("testPPT.ppt", "application/vnd.ms-powerpoint");
+        
+        // With the wrong filename supplied, data will trump filename
+        assertTypeByNameAndData("testEXCEL.xls", "notWord.doc",  "application/vnd.ms-excel");
+        assertTypeByNameAndData("testWORD.doc",  "notExcel.xls", "application/msword");
+        assertTypeByNameAndData("testPPT.ppt",   "notWord.doc",  "application/vnd.ms-powerpoint");
+        
+        // With a filename of a totally different type, data will trump filename
+        assertTypeByNameAndData("testEXCEL.xls", "notPDF.pdf",  "application/vnd.ms-excel");
+        assertTypeByNameAndData("testEXCEL.xls", "notPNG.png",  "application/vnd.ms-excel");
+    }
+    
+    /**
+     * There is no way to distinguish "proper" StarOffice files from templates.
+     * All templates have the same extension but their actual type depends on
+     * the magic. Our current MimeTypes class doesn't allow us to use the same
+     * glob pattern in more than one mimetype.
+     * 
+     * @throws Exception
+     */
+    @Test
+    public void testDetectStarOfficeFiles() throws Exception {
+        assertType("testStarOffice-5.2-calc.sdc",
+                "application/vnd.stardivision.calc",
+                "application/vnd.stardivision.calc");
+        assertType("testVORCalcTemplate.vor",
+                "application/vnd.stardivision.calc",
+                "application/vnd.stardivision.calc");
+        assertType("testStarOffice-5.2-draw.sda",
+                "application/vnd.stardivision.draw",
+                "application/vnd.stardivision.draw");
+        assertType("testVORDrawTemplate.vor",
+                "application/vnd.stardivision.draw",
+                "application/vnd.stardivision.draw");
+        assertType("testStarOffice-5.2-impress.sdd",
+                "application/vnd.stardivision.impress",
+                "application/vnd.stardivision.impress");
+        assertType("testVORImpressTemplate.vor",
+                "application/vnd.stardivision.impress",
+                "application/vnd.stardivision.impress");
+        assertType("testStarOffice-5.2-writer.sdw",
+                "application/vnd.stardivision.writer",
+                "application/vnd.stardivision.writer");
+        assertType("testVORWriterTemplate.vor",
+                "application/vnd.stardivision.writer",
+                "application/vnd.stardivision.writer");
+
+    }
+
+    @Test
+    public void testOpenContainer() throws Exception {
+        try (TikaInputStream stream = TikaInputStream.get(
+                TestContainerAwareDetector.class.getResource("/test-documents/testPPT.ppt"))) {
+            assertNull(stream.getOpenContainer());
+            assertEquals(
+                    MediaType.parse("application/vnd.ms-powerpoint"),
+                    detector.detect(stream, new Metadata()));
+            assertTrue(stream.getOpenContainer() instanceof NPOIFSFileSystem);
+        }
+    }
+
+    /**
+     * EPub uses a similar mimetype entry to OpenDocument for storing
+     *  the mimetype within the parent zip file
+     */
+    @Test
+    public void testDetectEPub() throws Exception {
+       assertTypeByData("testEPUB.epub", "application/epub+zip");
+       assertTypeByData("testiBooks.ibooks", "application/x-ibooks+zip");
+    }
+    
+    @Test
+    public void testDetectLotusNotesEml() throws Exception {
+        // Lotus .eml files aren't guaranteed to have any of the magic 
+        // matches as the first line, but should have X-Notes-Item and Message-ID
+        assertTypeByData("testLotusEml.eml", "message/rfc822");
+     }
+
+    @Test
+    public void testDetectODF() throws Exception {
+        assertTypeByData("testODFwithOOo3.odt", "application/vnd.oasis.opendocument.text");
+        assertTypeByData("testOpenOffice2.odf", "application/vnd.oasis.opendocument.formula");
+    }
+
+    @Test
+    public void testDetectOOXML() throws Exception {
+        assertTypeByData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+        assertTypeByData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+
+        // Check some of the less common OOXML types
+        assertTypeByData("testPPT.pptm", "application/vnd.ms-powerpoint.presentation.macroenabled.12");
+        assertTypeByData("testPPT.ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow");
+        assertTypeByData("testPPT.ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12");
+        assertTypeByData("testDOTM.dotm", "application/vnd.ms-word.template.macroEnabled.12");
+        assertTypeByData("testEXCEL.strict.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByData("testPPT.xps", "application/vnd.ms-xpsdocument");
+
+        assertTypeByData("testVISIO.vsdm", "application/vnd.ms-visio.drawing.macroenabled.12");
+        assertTypeByData("testVISIO.vsdx", "application/vnd.ms-visio.drawing");
+        assertTypeByData("testVISIO.vssm", "application/vnd.ms-visio.stencil.macroenabled.12");
+        assertTypeByData("testVISIO.vssx", "application/vnd.ms-visio.stencil");
+        assertTypeByData("testVISIO.vstm", "application/vnd.ms-visio.template.macroenabled.12");
+        assertTypeByData("testVISIO.vstx", "application/vnd.ms-visio.template");
+        
+        // .xlsb is an OOXML file containing the binary parts, and not
+        //  an OLE2 file as you might initially expect!
+        assertTypeByData("testEXCEL.xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12");
+
+        // With the filename and data
+        assertTypeByNameAndData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByNameAndData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+        assertTypeByNameAndData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+        
+        // With the wrong filename supplied, data will trump filename
+        assertTypeByNameAndData("testEXCEL.xlsx", "notWord.docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByNameAndData("testWORD.docx",  "notExcel.xlsx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+        assertTypeByNameAndData("testPPT.pptx",   "notWord.docx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+        
+        // With an incorrect filename of a different container type, data trumps filename
+        assertTypeByNameAndData("testEXCEL.xlsx", "notOldExcel.xls", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+    }
+    
+    /**
+     * Password Protected OLE2 files are fairly straightforward to detect, as they
+     *  have the same structure as regular OLE2 files. (Core streams may be encrypted
+     *  however)
+     */
+    @Test
+    public void testDetectProtectedOLE2() throws Exception {
+        assertTypeByData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
+        assertTypeByData("testWORD_protected_passtika.doc", "application/msword");
+        assertTypeByData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
+        assertTypeByNameAndData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
+        assertTypeByNameAndData("testWORD_protected_passtika.doc", "application/msword");
+        assertTypeByNameAndData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
+    }
+
+    /**
+     * Password Protected OOXML files are much more tricky beasts to work with.
+     * They have a very different structure to regular OOXML files, and instead
+     *  of being ZIP based they are actually an OLE2 file which contains the
+     *  OOXML structure within an encrypted stream.
+     * This makes detecting them much harder...
+     */
+    @Test
+    public void testDetectProtectedOOXML() throws Exception {
+        // Encrypted Microsoft Office OOXML files have OLE magic but
+        //  special streams, so we can tell they're Protected OOXML
+        assertTypeByData("testEXCEL_protected_passtika.xlsx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByData("testWORD_protected_passtika.docx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByData("testPPT_protected_passtika.pptx", 
+                "application/x-tika-ooxml-protected");
+        
+        // At the moment, we can't use the name to specialise
+        // See discussions on TIKA-790 for details
+        assertTypeByNameAndData("testEXCEL_protected_passtika.xlsx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByNameAndData("testWORD_protected_passtika.docx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByNameAndData("testPPT_protected_passtika.pptx", 
+                "application/x-tika-ooxml-protected");
+    }
+
+    /**
+     * Check that temporary files created by Tika are removed after
+     * closing TikaInputStream.
+     */
+    @Test
+    public void testRemovalTempfiles() throws Exception {
+        assertRemovalTempfiles("testWORD.docx");
+        assertRemovalTempfiles("test-documents.zip");
+    }
+
+    private int countTemporaryFiles() {
+        return new File(System.getProperty("java.io.tmpdir")).listFiles(
+                new FilenameFilter() {
+                    public boolean accept(File dir, String name) {
+                        return name.startsWith("apache-tika-");
+                    }
+                }).length;
+    }
+
+    private void assertRemovalTempfiles(String fileName) throws Exception {
+        int numberOfTempFiles = countTemporaryFiles();
+
+        try (TikaInputStream stream = TikaInputStream.get(
+                TestContainerAwareDetector.class.getResource("/test-documents/" + fileName))) {
+            detector.detect(stream, new Metadata());
+        }
+
+        assertEquals(numberOfTempFiles, countTemporaryFiles());
+    }
+
+    @Test
+    public void testDetectIWork() throws Exception {
+        assertTypeByData("testKeynote.key", "application/vnd.apple.keynote");
+        assertTypeByData("testNumbers.numbers", "application/vnd.apple.numbers");
+        assertTypeByData("testPages.pages", "application/vnd.apple.pages");
+    }
+
+    @Test
+    public void testDetectKMZ() throws Exception {
+       assertTypeByData("testKMZ.kmz", "application/vnd.google-earth.kmz");
+    }
+    
+    @Test
+    public void testDetectIPA() throws Exception {
+        assertTypeByNameAndData("testIPA.ipa", "application/x-itunes-ipa");
+        assertTypeByData("testIPA.ipa", "application/x-itunes-ipa");
+    }
+    
+    @Test
+    public void testASiC() throws Exception {
+        assertTypeByData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
+        assertTypeByData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
+        assertTypeByNameAndData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
+        assertTypeByNameAndData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
+    }
+     
+    @Test
+    public void testDetectZip() throws Exception {
+        assertTypeByData("test-documents.zip", "application/zip");
+        assertTypeByData("test-zip-of-zip.zip", "application/zip");
+        
+        // JAR based formats
+        assertTypeByData("testJAR.jar", "application/java-archive");
+        assertTypeByData("testWAR.war", "application/x-tika-java-web-archive");
+        assertTypeByData("testEAR.ear", "application/x-tika-java-enterprise-archive");
+        assertTypeByData("testAPK.apk", "application/vnd.android.package-archive");
+        
+        // JAR with HTML files in it
+        assertTypeByNameAndData("testJAR_with_HTML.jar", "testJAR_with_HTML.jar",
+                                "application/java-archive", "application/java-archive");
+    }
+
+    private TikaInputStream getTruncatedFile(String name, int n)
+            throws IOException {
+        try (InputStream input = TestContainerAwareDetector.class.getResourceAsStream(
+                "/test-documents/" + name)) {
+            byte[] bytes = new byte[n];
+            int m = 0;
+            while (m < bytes.length) {
+                int i = input.read(bytes, m, bytes.length - m);
+                if (i != -1) {
+                    m += i;
+                } else {
+                    throw new IOException("Unexpected end of stream");
+                }
+            }
+            return TikaInputStream.get(bytes);
+        }
+    }
+
+    @Test
+    public void testTruncatedFiles() throws Exception {
+        // First up a truncated OOXML (zip) file
+       
+        // With only the data supplied, the best we can do is the container
+        Metadata m = new Metadata();
+        try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
+            assertEquals(
+                    MediaType.application("x-tika-ooxml"),
+                    detector.detect(xlsx, m));
+        }
+        
+        // With truncated data + filename, we can use the filename to specialise
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
+        try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
+            assertEquals(
+                    MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+                    detector.detect(xlsx, m));
+        }
+
+        // Now a truncated OLE2 file 
+        m = new Metadata();
+        try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
+            assertEquals(
+                    MediaType.application("x-tika-msoffice"),
+                    detector.detect(xls, m));
+        }
+        
+        // Finally a truncated OLE2 file, with a filename available
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xls");
+        try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
+            assertEquals(
+                    MediaType.application("vnd.ms-excel"),
+                    detector.detect(xls, m));
+        }
+   }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java b/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
new file mode 100644
index 0000000..45f68cc
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.embedder;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.txt.TXTParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Unit test for {@link ExternalEmbedder}s.
+ */
+public class ExternalEmbedderTest extends TikaTest {
+
+    static Path TMP_TEST_TXT;
+    protected static final DateFormat EXPECTED_METADATA_DATE_FORMATTER =
+            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
+    protected static final String DEFAULT_CHARSET = UTF_8.name();
+    private static final String COMMAND_METADATA_ARGUMENT_DESCRIPTION = "dc:description";
+    private static final String TEST_TXT_PATH = "test-documents/testTXT.txt";
+
+    private TemporaryResources tmp = new TemporaryResources();
+
+    @BeforeClass
+    public static void copyTestFile() throws Exception {
+        TMP_TEST_TXT = Files.createTempFile("tika-test", "");
+        Files.copy(TikaTest.class.getClassLoader().getResourceAsStream(TEST_TXT_PATH),
+                TMP_TEST_TXT, StandardCopyOption.REPLACE_EXISTING);
+    }
+
+    @AfterClass
+    public static void rmTestFile() throws Exception {
+        Files.delete(TMP_TEST_TXT);
+    }
+
+    /**
+     * Gets the expected returned metadata value for the given field
+     *
+     * @param fieldName
+     * @return a prefix added to the field name
+     */
+    protected String getExpectedMetadataValueString(String fieldName, Date timestamp) {
+        return this.getClass().getSimpleName() + " embedded " + fieldName +
+                " on " + EXPECTED_METADATA_DATE_FORMATTER.format(timestamp);
+    }
+
+    /**
+     * Gets the tika <code>Metadata</code> object containing data to be
+     * embedded.
+     *
+     * @return the populated tika metadata object
+     */
+    protected Metadata getMetadataToEmbed(Date timestamp) {
+        Metadata metadata = new Metadata();
+        metadata.add(TikaCoreProperties.DESCRIPTION,
+                getExpectedMetadataValueString(TikaCoreProperties.DESCRIPTION.toString(), timestamp));
+        return metadata;
+    }
+
+    /**
+     * Gets the <code>Embedder</code> to test.
+     *
+     * @return the embedder under test
+     */
+    protected Embedder getEmbedder() {
+        ExternalEmbedder embedder = new ExternalEmbedder();
+        Map<Property, String[]> metadataCommandArguments = new HashMap<Property, String[]>(1);
+        metadataCommandArguments.put(TikaCoreProperties.DESCRIPTION,
+                new String[] { COMMAND_METADATA_ARGUMENT_DESCRIPTION });
+        embedder.setMetadataCommandArguments(metadataCommandArguments);
+        return embedder;
+    }
+
+    /**
+     * Gets the source input stream through standard Java resource loaders 
+     * before metadata has been embedded.
+     *
+     * @return a fresh input stream
+     */
+    protected InputStream getSourceStandardInputStream() {
+        return this.getClass().getResourceAsStream(TEST_TXT_PATH);
+    }
+
+    /**
+     * Gets the source input stream via {@link TikaInputStream}
+     * before metadata has been embedded.
+     *
+     * @return a fresh input stream
+     * @throws FileNotFoundException 
+     */
+    protected InputStream getSourceTikaInputStream() throws IOException {
+        return TikaInputStream.get(TMP_TEST_TXT);
+    }
+
+    /**
+     * Gets the parser to use to verify the result of the embed operation.
+     *
+     * @return the parser to read embedded metadata
+     */
+    protected Parser getParser() {
+        return new TXTParser();
+    }
+
+    /**
+     * Whether or not the final result of reading the now embedded metadata is
+     * expected in the output of the external tool
+     *
+     * @return whether or not results are expected in command line output
+     */
+    protected boolean getIsMetadataExpectedInOutput() {
+        return true;
+    }
+
+    /**
+     * Tests embedding metadata then reading metadata to verify the results.
+     *
+     * @param isResultExpectedInOutput whether or not results are expected in command line output
+     */
+    protected void embedInTempFile(InputStream sourceInputStream, boolean isResultExpectedInOutput) {
+        Embedder embedder = getEmbedder();
+        
+        // TODO Move this check to ExternalEmbedder
+        String os = System.getProperty("os.name", "");
+        if (os.contains("Windows")) {
+            // Skip test on Windows
+            return;
+        }
+        
+        Date timestamp = new Date();
+        Metadata metadataToEmbed = getMetadataToEmbed(timestamp);
+
+        try {
+            File tempOutputFile = tmp.createTemporaryFile();
+            FileOutputStream tempFileOutputStream = new FileOutputStream(tempOutputFile);
+
+            // Embed the metadata into a copy of the original output stream
+            embedder.embed(metadataToEmbed, sourceInputStream, tempFileOutputStream, null);
+
+            ParseContext context = new ParseContext();
+            Parser parser = getParser();
+            context.set(Parser.class, parser);
+
+            // Setup the extracting content handler
+            ByteArrayOutputStream result = new ByteArrayOutputStream();
+            OutputStreamWriter outputWriter = new OutputStreamWriter(result,DEFAULT_CHARSET);
+            ContentHandler handler = new BodyContentHandler(outputWriter);
+
+            // Create a new metadata object to read the new metadata into
+            Metadata embeddedMetadata = new Metadata();
+
+            // Setup a re-read of the now embeded temp file
+            FileInputStream embeddedFileInputStream = new FileInputStream(tempOutputFile);
+
+            parser.parse(embeddedFileInputStream, handler, embeddedMetadata,
+                    context);
+
+            tmp.dispose();
+
+            String outputString = null;
+            if (isResultExpectedInOutput) {
+                outputString = result.toString(DEFAULT_CHARSET);
+            } else {
+                assertTrue("no metadata found", embeddedMetadata.size() > 0);
+            }
+
+            // Check each metadata property for the expected value
+            for (String metadataName : metadataToEmbed.names()) {
+                if (metadataToEmbed.get(metadataName) != null) {
+                    String expectedValue = metadataToEmbed.get(metadataName);
+                    boolean foundExpectedValue = false;
+                    if (isResultExpectedInOutput) {
+                        // just check that the entire output contains the expected string
+                        foundExpectedValue = outputString.contains(expectedValue);
+                    } else {
+                        if (embeddedMetadata.isMultiValued(metadataName)) {
+                            for (String embeddedValue : embeddedMetadata.getValues(metadataName)) {
+                                if (embeddedValue != null) {
+                                    if (embeddedValue.contains(expectedValue)) {
+                                        foundExpectedValue = true;
+                                        break;
+                                    }
+                                }
+                            }
+                        } else {
+                            String embeddedValue = embeddedMetadata.get(metadataName);
+                            assertNotNull("expected metadata for "
+                                    + metadataName + " not found",
+                                    embeddedValue);
+                            foundExpectedValue = embeddedValue.contains(expectedValue);
+                        }
+                    }
+                    assertTrue(
+                            "result did not contain expected appended metadata "
+                                    + metadataName + "="
+                                    + expectedValue,
+                            foundExpectedValue);
+                }
+            }
+        } catch (IOException e) {
+            fail(e.getMessage());
+        } catch (TikaException e) {
+            fail(e.getMessage());
+        } catch (SAXException e) {
+            fail(e.getMessage());
+        }
+    }
+    
+    protected void checkSourceFileExists() {
+        String message = "the original input file was deleted";
+        assertNotNull(message, TMP_TEST_TXT);
+        assertTrue(message, Files.isRegularFile(TMP_TEST_TXT));
+    }
+
+    /**
+     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceStandardInputStream()}
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void testEmbedStandardInputStream() throws IOException {
+        embedInTempFile(getSourceStandardInputStream(), getIsMetadataExpectedInOutput());
+        checkSourceFileExists();
+    }
+    
+    /**
+     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceTikaInputStream()}
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void testEmbedTikaInputStream() throws IOException {
+        embedInTempFile(getSourceTikaInputStream(), getIsMetadataExpectedInOutput());
+        checkSourceFileExists();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java b/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
new file mode 100644
index 0000000..447042b
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.tika.mime.MimeType;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypeTest {
+
+    private MimeTypes types;
+    private MimeType text;
+
+    @Before
+    public void setUp() throws MimeTypeException {
+        types = new MimeTypes();
+        text = types.forName("text/plain");
+    }
+
+    /** Test MimeType constructor */
+    @Test
+    public void testConstrctor() {
+        // Missing name
+        try {
+            new MimeType(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+    }
+
+    @Test
+    public void testIsValidName() {
+        assertTrue(MimeType.isValid("application/octet-stream"));
+        assertTrue(MimeType.isValid("text/plain"));
+        assertTrue(MimeType.isValid("foo/bar"));
+        assertTrue(MimeType.isValid("a/b"));
+
+        assertFalse(MimeType.isValid("application"));
+        assertFalse(MimeType.isValid("application/"));
+        assertFalse(MimeType.isValid("/"));
+        assertFalse(MimeType.isValid("/octet-stream"));
+        assertFalse(MimeType.isValid("application//octet-stream"));
+        assertFalse(MimeType.isValid("application/octet=stream"));
+        assertFalse(MimeType.isValid("application/\u00f6ctet-stream"));
+        assertFalse(MimeType.isValid("text/plain;"));
+        assertFalse(MimeType.isValid("text/plain; charset=UTF-8"));
+        try {
+            MimeType.isValid(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+    }
+
+    /** Test MimeType setDescription() */
+    @Test
+    public void testSetEmptyValues() {
+        try {
+            text.setDescription(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+        
+        try {
+            text.setAcronym(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+        
+        try {
+            text.addLink(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+
+        try {
+            text.setUniformTypeIdentifier(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java b/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
new file mode 100644
index 0000000..be8a575
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import static org.apache.tika.mime.MediaType.OCTET_STREAM;
+import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypesTest {
+
+    private MimeTypes types;
+
+    private MediaTypeRegistry registry;
+
+    private MimeType binary;
+
+    private MimeType text;
+
+    private MimeType html;
+
+    @Before
+    public void setUp() throws MimeTypeException {
+        types = new MimeTypes();
+        registry = types.getMediaTypeRegistry();
+        binary = types.forName("application/octet-stream");
+        text = types.forName("text/plain");
+        types.addAlias(text, MediaType.parse("text/x-plain"));
+        html = types.forName("text/html");
+        types.setSuperType(html, TEXT_PLAIN);
+    }
+
+    @Test
+    public void testForName() throws MimeTypeException {
+        assertEquals(text, types.forName("text/plain"));
+        assertEquals(text, types.forName("TEXT/PLAIN"));
+
+        try {
+            types.forName("invalid");
+            fail("MimeTypeException not thrown on invalid type name");
+        } catch (MimeTypeException e) {
+            // expected
+        }
+    }
+
+    @Test
+    public void testRegisteredMimes() throws MimeTypeException {
+        String dummy = "text/xxxxx";
+        assertEquals(text, types.getRegisteredMimeType("text/plain"));
+        assertNull(types.getRegisteredMimeType(dummy));
+        assertNotNull(types.forName(dummy));
+        assertEquals(dummy, types.forName("text/xxxxx").getType().toString());
+        assertEquals(dummy, types.getRegisteredMimeType("text/xxxxx").getType().toString());
+        
+        try {
+            types.forName("invalid");
+            fail("MimeTypeException not thrown on invalid type name");
+        } catch (MimeTypeException e) {
+            // expected
+        }
+    }
+
+    @Test
+    public void testSuperType() throws MimeTypeException {
+        assertNull(registry.getSupertype(OCTET_STREAM));
+        assertEquals(OCTET_STREAM, registry.getSupertype(TEXT_PLAIN));
+        assertEquals(TEXT_PLAIN, registry.getSupertype(html.getType()));
+   }
+
+    @Test
+    public void testIsDescendantOf() {
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, TEXT_PLAIN));
+        assertFalse(registry.isSpecializationOf(html.getType(), html.getType()));
+
+        assertTrue(registry.isSpecializationOf(html.getType(), OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, html.getType()));
+
+        assertTrue(registry.isSpecializationOf(html.getType(), TEXT_PLAIN));
+        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, html.getType()));
+
+        assertTrue(registry.isSpecializationOf(TEXT_PLAIN, OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, TEXT_PLAIN));
+    }
+
+    @Test
+    public void testCompareTo() {
+        assertTrue(binary.compareTo(binary) == 0);
+        assertTrue(binary.compareTo(text) != 0);
+        assertTrue(binary.compareTo(html) != 0);
+
+        assertTrue(text.compareTo(binary) != 0);
+        assertTrue(text.compareTo(text) == 0);
+        assertTrue(text.compareTo(html) != 0);
+
+        assertTrue(html.compareTo(binary) != 0);
+        assertTrue(html.compareTo(text) != 0);
+        assertTrue(html.compareTo(html) == 0);
+    }
+
+}


[02/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/brwNIMS_2014.dif
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/brwNIMS_2014.dif b/tika-test-resources/src/test/resources/test-documents/brwNIMS_2014.dif
new file mode 100644
index 0000000..e131add
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/brwNIMS_2014.dif
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+        <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd">
+          <Entry_ID>02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c</Entry_ID>
+          <Entry_Title>Barrow Logger Data NIMS 2014</Entry_Title>
+
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>BIOSPHERE</Topic>
+            <Term>ECOLOGICAL DYNAMICS</Term>
+          </Parameters>
+
+
+          <Spatial_Coverage>
+            <Southernmost_Latitude>70</Southernmost_Latitude>
+            <Northernmost_Latitude>72</Northernmost_Latitude>
+            <Westernmost_Longitude>-162</Westernmost_Longitude>
+            <Easternmost_Longitude>-150</Easternmost_Longitude>
+          </Spatial_Coverage>
+
+          <Data_Center>
+            <Data_Center_Name>
+              <Short_Name>ACADIS</Short_Name>
+              <Long_Name>Advanced Cooperative Arctic Data and Information Service</Long_Name>
+            </Data_Center_Name>
+            <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
+            <Personnel>
+              <Role>DATA CENTER CONTACT</Role>
+              <First_Name>ACADIS</First_Name>
+              <Last_Name>User Services</Last_Name>
+              <Contact_Address>
+                <Address>NCAR/CISL</Address>
+                <Address>P.O. Box 3000</Address>
+                <City>Boulder</City>
+                <Province_or_State>CO</Province_or_State>
+                <Postal_Code>80307</Postal_Code>
+                <Country>USA</Country>
+              </Contact_Address>
+            </Personnel>
+          </Data_Center>
+
+          <Summary>
+            <Abstract>Logger records from the Networked Info-mechanical Systems (NIMS), Transect length: ~50m The data was recorded using a CR3000 logger. The sensor trolley was equipped with instruments for recording the distance to vegetation canopy (SR50a Sonic Distance, Campbell Scientific), up- and downwelling short- and longwave radiation (CNR4 net radiometer, Kipp &amp; Zonen), air temperature and surface temperature (SI-111 IR radiometer, Apogee Instruments Inc.) and spectral reflection (Jaz Combo-2, Ocean Optics; GreenSeeker RT100 (505), NTech).</Abstract>
+          </Summary>
+
+          <Related_URL>
+            <URL_Content_Type>
+              <Type>GET DATA</Type>
+            </URL_Content_Type>
+            <URL>http://www.aoncadis.org/dataset/id/02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c.html</URL>
+            <Description>Data Center top-level access page for this resource</Description>
+          </Related_URL>
+
+          <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
+          <Metadata_Version>9.8.4</Metadata_Version>
+          <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
+        </DIF>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/circles-with-prefix.svg
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/circles-with-prefix.svg b/tika-test-resources/src/test/resources/test-documents/circles-with-prefix.svg
new file mode 100644
index 0000000..d68ff55
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/circles-with-prefix.svg
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg:svg xmlns:svg="http://www.w3.org/2000/svg" width="12cm" height="12cm">
+  <svg:g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
+    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
+    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
+    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
+  </svg:g>
+</svg:svg>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/circles.svg
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/circles.svg b/tika-test-resources/src/test/resources/test-documents/circles.svg
new file mode 100644
index 0000000..8b71e82
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/circles.svg
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="12cm" height="12cm">
+  <g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
+    <circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
+    <circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
+    <circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
+  </g>
+</svg>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/datamatrix.png
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/datamatrix.png b/tika-test-resources/src/test/resources/test-documents/datamatrix.png
new file mode 100644
index 0000000..4aa5003
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/datamatrix.png differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/english.txt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/english.txt b/tika-test-resources/src/test/resources/test-documents/english.txt
new file mode 100644
index 0000000..5e3d20e
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/english.txt
@@ -0,0 +1 @@
+This is English!

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/foo.csv
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/foo.csv b/tika-test-resources/src/test/resources/test-documents/foo.csv
new file mode 100644
index 0000000..0f48f3e
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/foo.csv
@@ -0,0 +1,4 @@
+foo,bar,baz
+123,"abc def",-987
+456,"qwertyuiop",98765
+789,"qawsedrft",3.14159

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/french.txt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/french.txt b/tika-test-resources/src/test/resources/test-documents/french.txt
new file mode 100644
index 0000000..678e6c2
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/french.txt
@@ -0,0 +1 @@
+c’est comme ci comme ça

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/htmlfragment
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/htmlfragment b/tika-test-resources/src/test/resources/test-documents/htmlfragment
new file mode 100644
index 0000000..bf36d08
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/htmlfragment
@@ -0,0 +1,18 @@
+<div id="leftcol">
+	  <ul>
+        <li><a href="/mission/sec/sec.html"> Security and Information Sciences Home&nbsp;&rsaquo;</a>        </li>
+        <li><a href="/mission/sec/publications/-publications.html">Publications&nbsp;&rsaquo;</a> </li>
+        <li><a href="/mission/sec/corpora/corpora.html">Corpora&nbsp;&rsaquo;</a> </li>
+        <li><a href="/mission/sec/softwaretools/tools.html">Software Tools&nbsp;&rsaquo;</a></li>
+        <li><a href="/mission/sec/CSO/CSO.html"> Systems and Operations&nbsp;&rsaquo;</a>
+          <ul>
+            <li><a href="/mission/sec/publications/-publications.html">Publications &rsaquo;</a></li>
+            <li><a href="/mission/sec/CSO/biographies/CSObios.html">Biographies&nbsp;&rsaquo;</a></li>
+          </ul>
+        </li>
+        <li><a href="/mission/sec/CST/CST.html"> Systems and Technology&nbsp;&rsaquo;</a> </li>
+        <li><a href="/mission/sec/CSA/CSA.html"> System Assessments&nbsp;&rsaquo;</a> </li>
+	    <li><a href="/mission/sec/HLT/HLT.html">Human Language Technology&nbsp;&rsaquo;</a>
+<li><a href="/mission/sec/computing/computing.html">Computing and Analytics&nbsp;&rsaquo;</a></li>
+  </ul>
+</div>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml b/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
index 4561c3a..f4f857a 100644
--- a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
+++ b/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
@@ -21,5 +21,5 @@
 <mock>
     <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
     <write element="p">some content</write>
-    <throw class="java.lang.NullPointerException">another null pointer exception</throw>
-</mock>
\ No newline at end of file
+    <throw class="java.lang.NullPointerException">null pointer message</throw>
+</mock>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/password.xls
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/password.xls b/tika-test-resources/src/test/resources/test-documents/password.xls
new file mode 100644
index 0000000..a6ad86a
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/password.xls differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/pic.xls
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/pic.xls b/tika-test-resources/src/test/resources/test-documents/pic.xls
new file mode 100644
index 0000000..6798ae2
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/pic.xls differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/pic.xlsx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/pic.xlsx b/tika-test-resources/src/test/resources/test-documents/pic.xlsx
new file mode 100644
index 0000000..9cc155a
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/pic.xlsx differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/plotutils-bin-cgm-v3.cgm
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/plotutils-bin-cgm-v3.cgm b/tika-test-resources/src/test/resources/test-documents/plotutils-bin-cgm-v3.cgm
new file mode 100644
index 0000000..450f5ad
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/plotutils-bin-cgm-v3.cgm differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/stylesheet.xsl
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/stylesheet.xsl b/tika-test-resources/src/test/resources/test-documents/stylesheet.xsl
new file mode 100644
index 0000000..d704f07
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/stylesheet.xsl
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="utf-8"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+  <xsl:output method="xml" indent="yes"/>
+
+  <xsl:template match="/">
+    <test hello="world"/>
+  </xsl:template>
+</xsl:stylesheet>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf1.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf1.xml b/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf1.xml
new file mode 100644
index 0000000..dc88dcf
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf1.xml
@@ -0,0 +1,39 @@
+<?xml version='1.0' encoding='ISO-8859-1'?>
+
+<!DOCTYPE uridef[
+  <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns">
+  <!ENTITY shadow-rdf "http://www.daml.org/services/owl-s/1.2/generic/ObjectList.owl">
+  <!ENTITY expr "http://www.daml.org/services/owl-s/1.2/generic/Expression.owl">
+  <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema">
+  <!ENTITY owl "http://www.w3.org/2002/07/owl">
+  <!ENTITY xsd "http://www.w3.org/2001/XMLSchema">
+  <!ENTITY time "http://www.isi.edu/~hobbs/damltime/time-entry.owl">
+  <!ENTITY swrl "http://www.w3.org/2003/11/swrl">
+  <!ENTITY service "http://www.daml.org/services/owl-s/1.2/Service.owl">
+  <!ENTITY grounding "http://www.daml.org/services/owl-s/1.2/Grounding.owl">
+  <!ENTITY process "http://www.daml.org/services/owl-s/1.2/Process.owl">
+  <!ENTITY DEFAULT "http://www.daml.org/services/owl-s/1.2/Process.owl">
+]>
+
+
+<rdf:RDF
+  xmlns:rdf=    "&rdf;#"
+  xmlns:shadow-rdf= "&shadow-rdf;#"
+  xmlns:expr= "&expr;#"
+  xmlns:rdfs=   "&rdfs;#"
+  xmlns:owl= "&owl;#"
+  xmlns:swrl= "&swrl;#"
+  xmlns:xsd= "&xsd;#"
+  xmlns:service= "&service;#"
+  xmlns:process= "&process;#"
+  xmlns:grounding= "&grounding;#"
+  xmlns=        "&DEFAULT;#"
+  xml:base="&process;">
+
+<!--
+  TIKA-309: Mime type application/rdf+xml not correctly detected
+  Simplified test case based on the OWL document at
+  http://www.ai.sri.com/daml/services/owl-s/1.2/Process.owl
+-->
+
+</rdf:RDF>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf2.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf2.xml b/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf2.xml
new file mode 100644
index 0000000..0f8fe28
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test-difficult-rdf2.xml
@@ -0,0 +1,44 @@
+<!-- This is the OWL 2 Namespace Document, sometimes
+     called the "owl.owl" file.
+
+     For some commentary about its creation, see
+     http://www.w3.org/2007/OWL/wiki/Owl2DotOwlDevel
+
+     This was created from the 16 Oct 2009 version of
+     that page, with the turtle-to-rdf/xml conversion
+     done by cwm, and the conversion to XML entity
+     references done by hand. The GRDDL triple and
+     namespace have also been added by hand
+
+     The real OWL 1 and OWL 2 namespace is:
+          http://www.w3.org/2002/07/owl#
+
+-->
+<!DOCTYPE rdf:RDF [
+
+<!ENTITY location "http://www.w3.org/2002/07/owl" >
+<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
+<!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
+<!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
+<!ENTITY dc "http://purl.org/dc/elements/1.1/" >
+<!ENTITY grddl "http://www.w3.org/2003/g/data-view#" >
+<!ENTITY owl "&location;#" >
+
+]>
+<rdf:RDF
+    xml:base ="&location;"
+    xmlns:rdf ="&rdf;"
+    xmlns:rdfs="&rdfs;"
+    xmlns:xsd = "&xsd;"
+    xmlns:owl ="&owl;"
+    xmlns:dc = "&dc;"
+    xmlns:grddl = "&grddl;"
+    >
+
+<!--
+  TIKA-309: Mime type application/rdf+xml not correctly detected
+  Simplified test case based on the OWL 2 Namespace Document at
+  http://www.w3.org/2002/07/owl#
+-->
+
+</rdf:RDF>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-iso-8859-1.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-iso-8859-1.xml b/tika-test-resources/src/test/resources/test-documents/test-iso-8859-1.xml
new file mode 100644
index 0000000..7573369
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test-iso-8859-1.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-long-comment.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-long-comment.xml b/tika-test-resources/src/test/resources/test-documents/test-long-comment.xml
new file mode 100644
index 0000000..84844ec
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test-long-comment.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<?somepi blahblah test="ignore-me.xml" ?>
+<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-tika-327.html
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-tika-327.html b/tika-test-resources/src/test/resources/test-documents/test-tika-327.html
new file mode 100644
index 0000000..792a18b
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test-tika-327.html
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="iso-8859-1"?><link href="http://www.apache.org" rel="stylesheet" type="text/css" />
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+<title>title</title>
+<meta name="description" content="content" />
+<meta name="keywords" content="keys" />
+<script language="JavaScript" type="text/javascript">
+<!--
+function hello() {
+}
+//-->
+
+
+</script>
+
+<!-- IE fix -->
+<style type="text/css">form { display: inline }</style>
+<!--
+comment
+-->
+</head>
+
+<body>
+<table>
+  <tr>
+    <td> 
+	<table>
+        <tr>
+          <td><font class="title"><!--comment--><a href="index.php">image</a></font></td>
+          <td> <table>
+              <tr>
+                <td>
+                                  </td>
+              </tr>
+         </table></td>
+
+
+
+        </tr>
+        <tr>
+          <td>
+            <span class="class">Home </span>            </span>
+          </td>
+          <td>
+            July 2, 2013           </td>
+        </tr>
+      </table></td>
+  </tr>
+</table>
+end of table
+</body>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-utf16be.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-utf16be.xml b/tika-test-resources/src/test/resources/test-documents/test-utf16be.xml
new file mode 100644
index 0000000..6835338
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/test-utf16be.xml differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-utf16le.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-utf16le.xml b/tika-test-resources/src/test/resources/test-documents/test-utf16le.xml
new file mode 100644
index 0000000..2a9124d
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/test-utf16le.xml differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-utf8-bom.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-utf8-bom.xml b/tika-test-resources/src/test/resources/test-documents/test-utf8-bom.xml
new file mode 100644
index 0000000..4cd4db3
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test-utf8-bom.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test-utf8.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test-utf8.xml b/tika-test-resources/src/test/resources/test-documents/test-utf8.xml
new file mode 100644
index 0000000..1304d8b
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test-utf8.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<test hello="world"/>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test.html
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test.html b/tika-test-resources/src/test/resources/test-documents/test.html
new file mode 100644
index 0000000..763e237
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/test.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>Hello World</title>
+</head>
+<body>
+  <p>Hello World!<p/>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/test.xls
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/test.xls b/tika-test-resources/src/test/resources/test-documents/test.xls
new file mode 100644
index 0000000..347d8a6
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/test.xls differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/testRTF_npeFromWMFInTikaServer.rtf
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/testRTF_npeFromWMFInTikaServer.rtf b/tika-test-resources/src/test/resources/test-documents/testRTF_npeFromWMFInTikaServer.rtf
new file mode 100644
index 0000000..a5870e5
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/testRTF_npeFromWMFInTikaServer.rtf
@@ -0,0 +1,235 @@
+{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff0\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang2057\deflangfe2057\themelang2057\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
+{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}{\f171\fbidi \froman\fcharset0\fprq2{\*\panose 02040602050305030304}Book Antiqua;}
+{\f318\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0603030504020204}Humnst777 BT{\*\falt Lucida Sans Unicode};}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria;}
+{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}
+{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f319\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f320\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\f322\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f323\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f324\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f325\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\f326\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f327\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f329\fbidi \fswiss\fcharset238\fprq2 Arial CE;}{\f330\fbidi \fswiss\fcharset204\fprq2 Arial Cyr;}
+{\f332\fbidi \fswiss\fcharset161\fprq2 Arial Greek;}{\f333\fbidi \fswiss\fcharset162\fprq2 Arial Tur;}{\f334\fbidi \fswiss\fcharset177\fprq2 Arial (Hebrew);}{\f335\fbidi \fswiss\fcharset178\fprq2 Arial (Arabic);}
+{\f336\fbidi \fswiss\fcharset186\fprq2 Arial Baltic;}{\f337\fbidi \fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f659\fbidi \froman\fcharset238\fprq2 Cambria Math CE;}{\f660\fbidi \froman\fcharset204\fprq2 Cambria Math Cyr;}
+{\f662\fbidi \froman\fcharset161\fprq2 Cambria Math Greek;}{\f663\fbidi \froman\fcharset162\fprq2 Cambria Math Tur;}{\f666\fbidi \froman\fcharset186\fprq2 Cambria Math Baltic;}{\f667\fbidi \froman\fcharset163\fprq2 Cambria Math (Vietnamese);}
+{\f2029\fbidi \froman\fcharset238\fprq2 Book Antiqua CE;}{\f2030\fbidi \froman\fcharset204\fprq2 Book Antiqua Cyr;}{\f2032\fbidi \froman\fcharset161\fprq2 Book Antiqua Greek;}{\f2033\fbidi \froman\fcharset162\fprq2 Book Antiqua Tur;}
+{\f2036\fbidi \froman\fcharset186\fprq2 Book Antiqua Baltic;}{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
+{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
+{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \froman\fcharset238\fprq2 Cambria CE;}
+{\fhimajor\f31529\fbidi \froman\fcharset204\fprq2 Cambria Cyr;}{\fhimajor\f31531\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\fhimajor\f31532\fbidi \froman\fcharset162\fprq2 Cambria Tur;}
+{\fhimajor\f31535\fbidi \froman\fcharset186\fprq2 Cambria Baltic;}{\fhimajor\f31536\fbidi \froman\fcharset163\fprq2 Cambria (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
+{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
+{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
+{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
+{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
+{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}
+{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}
+{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
+{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
+{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
+{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;
+\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\*\defchp \fs22 }{\*\defpap 
+\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 
+\ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\s1\ql \li0\ri0\keepn\widctlpar\tx5670\tx8222\wrapdefault\faauto\outlinelevel0\rin0\lin0\itap0 \rtlch\fcs1 \ab\af1\afs24\alang1025 \ltrch\fcs0 
+\b\f1\fs20\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext0 \slink15 \sqformat heading 1;}{\s2\ql \li0\ri0\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel1\adjustright\rin0\lin0\itap0 \rtlch\fcs1 
+\ab\af1\afs28\alang1025 \ltrch\fcs0 \b\f1\fs28\lang2057\langfe2057\cgrid\langnp2057\langfenp2057 \sbasedon0 \snext0 \slink16 \sqformat heading 2;}{\s4\ql \li5670\ri0\keepn\widctlpar\tx5670\tx7371\wrapdefault\faauto\outlinelevel3\rin0\lin5670\itap0 
+\rtlch\fcs1 \ab\af1\afs16\alang1025 \ltrch\fcs0 \b\f1\fs16\lang2057\langfe2057\cgrid\langnp2057\langfenp2057 \sbasedon0 \snext0 \slink17 \sqformat heading 4;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\*
+\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblind0\tblindtype3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa200\sl276\slmult1
+\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs22\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe2057\cgrid\langnp2057\langfenp2057 \snext11 \ssemihidden \sunhideused \sqformat Normal Table;}{\*\cs15 \additive 
+\rtlch\fcs1 \ab\af31503\afs32 \ltrch\fcs0 \b\fs32\lang0\langfe1033\kerning32\loch\f31502\hich\af31502\dbch\af31501\langnp0\langfenp1033 \sbasedon10 \slink1 \slocked \spriority9 Heading 1 Char;}{\*\cs16 \additive \rtlch\fcs1 \ab\ai\af31503\afs28 
+\ltrch\fcs0 \b\i\fs28\lang0\langfe1033\loch\f31502\hich\af31502\dbch\af31501\langnp0\langfenp1033 \sbasedon10 \slink2 \slocked \ssemihidden \spriority9 Heading 2 Char;}{\*\cs17 \additive \rtlch\fcs1 \ab\af31507\afs28 \ltrch\fcs0 
+\b\fs28\lang0\langfe1033\loch\f31506\hich\af31506\dbch\af31505\langnp0\langfenp1033 \sbasedon10 \slink4 \slocked \ssemihidden \spriority9 Heading 4 Char;}{\s18\ql \li0\ri0\widctlpar
+\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext18 \slink19 header;}{\*\cs19 \additive \rtlch\fcs1 
+\af0\afs24 \ltrch\fcs0 \fs24\lang0\langfe1033\langnp0\langfenp1033 \sbasedon10 \slink18 \slocked \ssemihidden Header Char;}{\s20\ql \li0\ri0\widctlpar\tx3402\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 
+\ab\af1\afs24\alang1025 \ltrch\fcs0 \b\f1\fs20\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext20 \slink21 Body Text;}{\*\cs21 \additive \rtlch\fcs1 \af0\afs24 \ltrch\fcs0 \fs24\lang0\langfe1033\langnp0\langfenp1033 
+\sbasedon10 \slink20 \slocked \ssemihidden Body Text Char;}{\s22\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 
+\fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext22 \slink23 \styrsid14506524 footer;}{\*\cs23 \additive \rtlch\fcs1 \af0\afs24 \ltrch\fcs0 \fs24\lang0\langfe1033\langnp0\langfenp1033 \sbasedon10 \slink22 \slocked \ssemihidden 
+Footer Char;}}{\*\rsidtbl \rsid69694\rsid615335\rsid817088\rsid1394934\rsid1968554\rsid2362503\rsid2504751\rsid2508965\rsid3497332\rsid3954968\rsid4262707\rsid4459777\rsid4947815\rsid5249973\rsid5375126\rsid5768946\rsid6625584\rsid6695929\rsid7547824
+\rsid7568219\rsid7681002\rsid7756842\rsid8788056\rsid9179382\rsid9185548\rsid9589441\rsid9716173\rsid10108489\rsid10158374\rsid10170376\rsid10447577\rsid10506307\rsid10508481\rsid11937854\rsid12735407\rsid14506524\rsid15223573\rsid15351889\rsid15429861
+\rsid15800823\rsid16209942\rsid16329808\rsid16338741\rsid16531520}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\title Cardiff}{\author A Other}
+{\operator Ian Williams}{\creatim\yr2016\mo2\dy1\hr16\min12}{\revtim\yr2016\mo2\dy1\hr16\min12}{\version2}{\edmins2}{\nofpages1}{\nofwords6}{\nofchars37}{\*\company Cardiff}{\nofcharsws42}{\vern32773}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/off
+ice/word/2003/wordml}}\paperw11906\paperh16838\margl851\margr851\margt567\margb794\gutter0\ltrsect 
+\widowctrl\ftnbj\aenddoc\trackmoves1\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen
+\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace120\dgvspace180\dghorigin851\dgvorigin567\dghshow2\dgvshow1
+\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nojkernpunct\rsidroot15429861 \fet0{\*\wgrffmtfilter 013f}\ilfomacatclnup0{\*\template 
+C:\\PMS\\DOCUMENT\\gplnew.dot}{\*\ftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 
+{\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid5249973 \chftnsep 
+\par }}{\*\ftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 
+\ltrch\fcs0 \insrsid5249973 \chftnsepc 
+\par }}{\*\aftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 
+\ltrch\fcs0 \insrsid5249973 \chftnsep 
+\par }}{\*\aftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 
+\ltrch\fcs0 \insrsid5249973 \chftnsepc 
+\par }}\ltrpar \sectd \ltrsect\linex0\headery709\footery709\colsx708\endnhere\sectlinegrid360\sectdefaultcl\sectrsid3497332\sftnbj {\headerl \ltrpar \pard\plain \ltrpar\s18\ql \li0\ri0\widctlpar
+\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
+\par }}{\headerr \ltrpar \pard\plain \ltrpar\s18\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
+\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
+\par }}{\footerl \ltrpar \pard\plain \ltrpar\s22\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
+\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
+\par }}{\footerr \ltrpar \pard\plain \ltrpar\s22\qr \li-284\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin-284\itap0\pararsid14506524 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 
+\fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0\afs16 \ltrch\fcs0 \f318\fs16\insrsid1968554 {\pict{\*\picprop\shplid1026{\sp{\sn shapeType}{\sv 1}}{\sp{\sn fFlipH}{\sv 0}}
+{\sp{\sn fFlipV}{\sv 0}}{\sp{\sn fillColor}{\sv 3355443}}{\sp{\sn fRecolorFillAsPicture}{\sv 0}}{\sp{\sn fUseShapeAnchor}{\sv 0}}{\sp{\sn fFilled}{\sv 1}}{\sp{\sn fLine}{\sv 0}}{\sp{\sn pctHR}{\sv 0}}{\sp{\sn alignHR}{\sv 1}}{\sp{\sn dxHeightHR}{\sv 20}}
+{\sp{\sn dxWidthHR}{\sv 10943}}{\sp{\sn fLayoutInCell}{\sv 1}}{\sp{\sn fStandardHR}{\sv 1}}{\sp{\sn fNoshadeHR}{\sv 1}}{\sp{\sn fHorizRule}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 1}}}\picscalex1094\picscaley4\piccropl0\piccropr0\piccropt0\piccropb0
+\picw1764\pich882\picwgoal1000\pichgoal500\wmetafile8\bliptag667904020\blipupi71{\*\blipuid 27cf68149ca99ab95f958a7b62da888e}010009000003dd02000006001202000000001202000026060f001a04574d464301000000000001003e050000000001000000f803000000000000f80300000100
+00006c000000ffffffffffffffff111100002c00000000000000000000003e480000b900000020454d4600000100f80300001d00000003000000000000000000
+000000000000981200009f1a0000ca0000002101000000000000000000000000000023130300f6660400160000000c000000180000000a000000100000000000
+0000000000000900000010000000111100002c000000250000000c0000000e000080250000000c0000000e000080120000000c00000001000000520000007001
+000001000000a4ffffff000000000000000000000000900100000000000004400022430061006c00690062007200690000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000000000001900304e19001000000094511900144f1900fa4e5966945119008c4e
+190010000000fc4f190078511900cc4e5966945119008c4e1900200000008a790c5f8c4e19009451190020000000ffffffffdc008900057a0c5fffffffffffff
+0180ffff01809f020180ffffffff00420000000800000008000018f18d1001000000000000005802000025000000372e90010000020f0502020204030204ff02
+00e0ffac004001000000000000009f01000000000000430061006c006900620072000000000020ebf70486d759667a68466dbc008900306c8000c04e19009832
+055f1f00000001000000fc4e1900fc4e1900907b035f1f000000244f1900dc0089006476000800000000250000000c00000001000000250000000c0000000100
+0000250000000c00000001000000180000000c0000000000000254000000540000000000000000000000350000002b000000010000005fcc87405eb387400000
+000057000000010000004c000000040000000000000000000000111100002c00000050000000200000003600000046000000280000001c000000474449430200
+0000ffffffffffffffff111100002c000000000000002100000008000000620000000c0000000100000024000000240000000000803e00000000000000000000
+803e000000000000000002000000270000001800000002000000000000003333330000000000250000000c00000002000000250000000c000000080000805600
+000030000000ffffffffffffffff111100002c00000005000000fefffefffeffad004144ad004144fefffefffeff250000000c00000007000080250000000c00
+000000000080240000002400000000008040000000000000000000008040000000000000000002000000220000000c000000ffffffff46000000140000000800
+00004744494303000000250000000c0000000e000080250000000c0000000e0000800e0000001400000000000000100000001400000004000000030108000500
+00000b0200000000050000000c0205000c02040000002e0118001c000000fb020200010000000000bc02000000000102022253797374656d003f00003f3f0000
+0000000000000000000001003f3f3f3f3f00040000002d010000040000002d01000004000000020101001c000000fb02f5ff0000000000009001000000000440
+002243616c6962726900000000000000000000000000000000000000000000000000040000002d010100040000002d010100040000002d010100050000000902
+000000020d000000320a0a00000001000400000000000e02050020000600030000001e0007000000fc020000333333000000040000002d01020008000000fa02
+050000000000ffffff00040000002d0103000e0000002403050000000000000005000e0205000e0200000000000008000000fa0200000000000000000000040000002d01040007000000fc020000ffffff000000040000002d010500040000002701ffff040000002d010000040000002d010000030000000000}}{
+\rtlch\fcs1 \af0 \ltrch\fcs0 \f318\ul\insrsid5249973 
+\par }\pard \ltrpar\s22\ql \li-284\ri0\widctlpar\tx3075\tx3119\tx7655\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin-284\itap0\pararsid16338741 {\rtlch\fcs1 \af0\afs14 \ltrch\fcs0 \b\f318\fs14\insrsid2504751 Example footer}{\rtlch\fcs1 \af0\afs14 
+\ltrch\fcs0 \f318\fs14\insrsid5249973 
+\par }}{\headerf \ltrpar \pard\plain \ltrpar\s18\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
+\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
+\par }}{\footerf \ltrpar \pard\plain \ltrpar\s22\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
+\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
+\par }}{\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}
+{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8
+\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\ltrrow\trowd \irow0\irowband0\ltrrow
+\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
+\cltxlrtb\clftsWidth3\clwWidth5508\clshdrawnil \cellx5400\clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl \cltxlrtb\clftsWidth3\clwWidth4912\clshdrawnil \cellx10312\pard\plain \ltrpar\ql \li0\ri0\widctlpar\intbl
+\tx5670\tx8222\wrapdefault\faauto\rin0\lin0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\cf1\lang2057\langfe2057\langfenp2057\insrsid6695929 \cell 
+}\pard \ltrpar\qc \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\pararsid16338741 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\lang2057\langfe2057\langfenp2057\insrsid6695929 \cell }\pard \ltrpar
+\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 {\rtlch\fcs1 \af1 \ltrch\fcs0 \f1\fs20\insrsid6695929 \trowd \irow0\irowband0\ltrrow
+\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
+\cltxlrtb\clftsWidth3\clwWidth5508\clshdrawnil \cellx5400\clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl \cltxlrtb\clftsWidth3\clwWidth4912\clshdrawnil \cellx10312\row \ltrrow}\trowd \irow1\irowband1\lastrow \ltrrow
+\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
+\cltxlrtb\clftsWidth3\clwWidth10420\clshdrawnil \cellx10312\pard \ltrpar\ql \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs8\lang2057\langfe2057\langfenp2057\insrsid6695929 
+\cell }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 {\rtlch\fcs1 \af1 \ltrch\fcs0 \f1\fs20\insrsid6695929 \trowd \irow1\irowband1\lastrow \ltrrow
+\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
+\cltxlrtb\clftsWidth3\clwWidth10420\clshdrawnil \cellx10312\row }\pard \ltrpar\qj \li0\ri0\widctlpar\tx0\wrapdefault\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0\pararsid15429861 {\rtlch\fcs1 \af0 \ltrch\fcs0 \f171\fs20\insrsid2504751 
+Example text to extract from RTF.}{\rtlch\fcs1 \af0 \ltrch\fcs0 \f171\fs20\insrsid15429861 
+\par 
+\par 
+\par }{\rtlch\fcs1 \af0 \ltrch\fcs0 \b\f171\fs20\ul\insrsid15429861 
+\par }{\rtlch\fcs1 \af0 \ltrch\fcs0 \b\f171\fs20\ul\insrsid15429861\charrsid4947815 
+\par }{\*\themedata 504b030414000600080000002100828abc13fa0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb6ac3301045f785fe83d0b6d8
+72ba28a5d8cea249777d2cd20f18e4b12d6a8f843409c9df77ecb850ba082d74231062ce997b55ae8fe3a00e1893f354e9555e6885647de3a8abf4fbee29bbd7
+2a3150038327acf409935ed7d757e5ee14302999a654e99e393c18936c8f23a4dc072479697d1c81e51a3b13c07e4087e6b628ee8cf5c4489cf1c4d075f92a0b
+44d7a07a83c82f308ac7b0a0f0fbf90c2480980b58abc733615aa2d210c2e02cb04430076a7ee833dfb6ce62e3ed7e14693e8317d8cd0433bf5c60f53fea2fe7
+065bd80facb647e9e25c7fc421fd2ddb526b2e9373fed4bb902e182e97b7b461e6bfad3f010000ffff0300504b030414000600080000002100a5d6a7e7c00000
+00360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4fc7060abb08
+84a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b63095120f88d94fbc
+52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462a1a82fe353
+bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f7468656d652f7468
+656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b4b0d592c9c
+070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b4757e8d3f7
+29e245eb2b260a0238fd010000ffff0300504b03041400060008000000210096b5ade296060000501b0000160000007468656d652f7468656d652f7468656d65
+312e786d6cec594f6fdb3614bf0fd87720746f6327761a07758ad8b19b2d4d1bc46e871e698996d850a240d2497d1bdae38001c3ba618715d86d87615b8116d8
+a5fb34d93a6c1dd0afb0475292c5585e9236d88aad3e2412f9e3fbff1e1fa9abd7eec70c1d1221294fda5efd72cd4324f1794093b0eddd1ef62fad79482a9c04
+98f184b4bd2991deb58df7dfbb8ad755446282607d22d771db8b944ad79796a40fc3585ee62949606ecc458c15bc8a702910f808e8c66c69b9565b5d8a314d3c
+94e018c8de1a8fa94fd05093f43672e23d06af89927ac06762a049136785c10607758d9053d965021d62d6f6804fc08f86e4bef210c352c144dbab999fb7b471
+7509af678b985ab0b6b4ae6f7ed9ba6c4170b06c788a705430adf71bad2b5b057d03606a1ed7ebf5babd7a41cf00b0ef83a6569632cd467faddec9699640f671
+9e76b7d6ac355c7c89feca9cccad4ea7d36c65b258a206641f1b73f8b5da6a6373d9c11b90c537e7f08dce66b7bbeae00dc8e257e7f0fd2badd5868b37a088d1
+e4600ead1ddaef67d40bc898b3ed4af81ac0d76a197c86826828a24bb318f3442d8ab518dfe3a20f000d6458d104a9694ac6d88728eee2782428d60cf03ac1a5
+193be4cbb921cd0b495fd054b5bd0f530c1931a3f7eaf9f7af9e3f45c70f9e1d3ff8e9f8e1c3e3073f5a42ceaa6d9c84e5552fbffdeccfc71fa33f9e7ef3f2d1
+17d57859c6fffac327bffcfc793510d26726ce8b2f9ffcf6ecc98baf3efdfdbb4715f04d814765f890c644a29be408edf3181433567125272371be15c308d3f2
+8acd249438c19a4b05fd9e8a1cf4cd296699771c393ac4b5e01d01e5a30a787d72cf1178108989a2159c77a2d801ee72ce3a5c545a6147f32a99793849c26ae6
+6252c6ed637c58c5bb8b13c7bfbd490a75330f4b47f16e441c31f7184e140e494214d273fc80900aedee52ead87597fa824b3e56e82e451d4c2b4d32a423279a
+668bb6690c7e9956e90cfe766cb37b077538abd27a8b1cba48c80acc2a841f12e698f13a9e281c57911ce298950d7e03aba84ac8c154f8655c4f2af074481847
+bd804859b5e696007d4b4edfc150b12addbecba6b18b148a1e54d1bc81392f23b7f84137c2715a851dd0242a633f900710a218ed715505dfe56e86e877f0034e
+16bafb0e258ebb4faf06b769e888340b103d3311da9750aa9d0a1cd3e4efca31a3508f6d0c5c5c398602f8e2ebc71591f5b616e24dd893aa3261fb44f95d843b
+5974bb5c04f4edafb95b7892ec1108f3f98de75dc97d5772bdff7cc95d94cf672db4b3da0a6557f70db629362d72bcb0431e53c6066acac80d699a6409fb44d0
+8741bdce9c0e4971624a2378cceaba830b05366b90e0ea23aaa241845368b0eb9e2612ca8c742851ca251ceccc70256d8d87265dd96361531f186c3d9058edf2
+c00eafe8e1fc5c509031bb4d680e9f39a3154de0accc56ae644441edd76156d7429d995bdd88664a9dc3ad50197c38af1a0c16d684060441db02565e85f3b966
+0d0713cc48a0ed6ef7dedc2dc60b17e92219e180643ed27acffba86e9c94c78ab90980d8a9f0913ee49d62b512b79626fb06dccee2a432bbc60276b9f7dec44b
+7904cfbca4f3f6443ab2a49c9c2c41476dafd55c6e7ac8c769db1bc399161ee314bc2e75cf8759081743be1236ec4f4d6693e5336fb672c5dc24a8c33585b5fb
+9cc24e1d4885545b58463634cc5416022cd19cacfccb4d30eb45296023fd35a458598360f8d7a4003bbaae25e331f155d9d9a5116d3bfb9a95523e51440ca2e0
+088dd844ec6370bf0e55d027a012ae264c45d02f708fa6ad6da6dce29c255df9f6cae0ec38666984b372ab5334cf640b37795cc860de4ae2816e95b21be5ceaf
+8a49f90b52a51cc6ff3355f47e0237052b81f6800fd7b802239daf6d8f0b1571a8426944fdbe80c6c1d40e8816b88b8569082ab84c36ff0539d4ff6dce591a26
+ade1c0a7f669880485fd484582903d284b26fa4e2156cff62e4b9265844c4495c495a9157b440e091bea1ab8aaf7760f4510eaa69a6465c0e04ec69ffb9e65d0
+28d44d4e39df9c1a52ecbd3607fee9cec7263328e5d661d3d0e4f62f44acd855ed7ab33cdf7bcb8ae889599bd5c8b3029895b6825696f6af29c239b75a5bb1e6
+345e6ee6c28117e73586c1a2214ae1be07e93fb0ff51e133fb65426fa843be0fb515c187064d0cc206a2fa926d3c902e907670048d931db4c1a44959d366ad93
+b65abe595f70a75bf03d616c2dd959fc7d4e6317cd99cbcec9c58b34766661c7d6766ca1a9c1b327531486c6f941c638c67cd22a7f75e2a37be0e82db8df9f30
+254d30c1372581a1f51c983c80e4b71ccdd28dbf000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d652f74
+68656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d363f24
+51eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e3198
+720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d9850528
+a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100828abc13fa0000001c0200001300000000000000000000000000
+000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b000000000000000000000000
+002b0100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c00000000000000000000000000140200007468
+656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d001400060008000000210096b5ade296060000501b000016000000000000000000
+00000000d10200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b010000270000000000
+00000000000000009b0900007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000960a00000000}
+{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d
+617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169
+6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363
+656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e}
+{\*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef1\lsdunhideuseddef1\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;
+\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;\lsdpriority39 \lsdlocked0 toc 1;\lsdpriority39 \lsdlocked0 toc 2;\lsdpriority39 \lsdlocked0 toc 3;\lsdpriority39 \lsdlocked0 toc 4;
+\lsdpriority39 \lsdlocked0 toc 5;\lsdpriority39 \lsdlocked0 toc 6;\lsdpriority39 \lsdlocked0 toc 7;\lsdpriority39 \lsdlocked0 toc 8;\lsdpriority39 \lsdlocked0 toc 9;\lsdqformat1 \lsdpriority35 \lsdlocked0 caption;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdpriority1 \lsdlocked0 Default Paragraph Font;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdpriority59 \lsdlocked0 Table Grid;
+\lsdunhideused0 \lsdlocked0 Placeholder Text;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdunhideused0 \lsdlocked0 Revision;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdpriority37 \lsdlocked0 Bibliography;
+\lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;}}{\*\datastore 0105000002000000180000004d73786d6c322e534158584d4c5265616465722e352e3000000000000000000000060000
+d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff090006000000000000000000000001000000010000000000000000100000feffffff00000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+fffffffffffffffffdfffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffffffffffffec69d9888b8b3d4c859eaf6cd158be0f0000000000000000000000004077
+60480b5dd101feffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff00000000000000000000000000000000000000000000000000000000
+00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000105000000000000}}
\ No newline at end of file


[13/13] tika git commit: Merge remote-tracking branch 'origin/2.x' into 2.x

Posted by ta...@apache.org.
Merge remote-tracking branch 'origin/2.x' into 2.x


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/e1498edb
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/e1498edb
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/e1498edb

Branch: refs/heads/2.x
Commit: e1498edbbb49c40c0d67c46e469c3db18012e0ae
Parents: aa5f60d cf96323
Author: tballison <ta...@mitre.org>
Authored: Mon Mar 21 21:19:03 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Mar 21 21:19:03 2016 -0400

----------------------------------------------------------------------
 .../java/org/apache/tika/parser/external/ExternalParser.java | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------



[08/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index 88205ca..b95d8a2 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -150,32 +150,6 @@
       <plugins>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-dependency-plugin</artifactId>
-          <version>2.10</version>
-          <executions>
-            <execution>
-              <id>unpack</id>
-              <phase>compile</phase>
-              <goals>
-                <goal>unpack</goal>
-              </goals>
-              <configuration>
-                <artifactItems>
-                  <artifactItem>
-                    <groupId>${project.groupId}</groupId>
-                    <artifactId>tika-test-resources</artifactId>
-                    <version>${project.version}</version>
-                    <type>test-jar</type>
-                    <overWrite>true</overWrite>
-                    <outputDirectory>${project.build.testOutputDirectory}</outputDirectory>
-                  </artifactItem>
-                </artifactItems>
-              </configuration>
-            </execution>
-          </executions>
-        </plugin>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-jar-plugin</artifactId>
           <executions>
             <execution>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java b/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
index fff644a..63e75a4 100644
--- a/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
@@ -16,26 +16,26 @@
  */
 package org.apache.tika.parser.ner;
 
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ner.opennlp.OpenNLPNERecogniser;
-import org.apache.tika.parser.ner.regex.RegexNERecogniser;
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.HashSet;
 
-import static org.junit.Assert.assertTrue;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ner.opennlp.OpenNLPNERecogniser;
+import org.apache.tika.parser.ner.regex.RegexNERecogniser;
+import org.junit.Test;
 
 /**
  *Test case for {@link NamedEntityParser}
  */
 public class NamedEntityParserTest {
 
-    public static final String CONFIG_FILE = "tika-config.xml";
+    public static final String CONFIG_FILE = "tika-config-for-ner.xml";
 
     @Test
     public void testParse() throws Exception {

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/regex/RegexNERecogniserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/regex/RegexNERecogniserTest.java b/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/regex/RegexNERecogniserTest.java
index 57c2162..257fea8 100644
--- a/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/regex/RegexNERecogniserTest.java
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/ner/regex/RegexNERecogniserTest.java
@@ -16,11 +16,7 @@
  */
 package org.apache.tika.parser.ner.regex;
 
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ner.NamedEntityParser;
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.nio.charset.StandardCharsets;
@@ -28,7 +24,12 @@ import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
-import static org.junit.Assert.assertTrue;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ner.NamedEntityParser;
+import org.apache.tika.parser.ner.NamedEntityParserTest;
+import org.junit.Test;
 
 public class RegexNERecogniserTest {
 
@@ -38,7 +39,7 @@ public class RegexNERecogniserTest {
         String text = "Hey, Lets meet on this Sunday or MONDAY because i am busy on Saturday";
         System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, RegexNERecogniser.class.getName());
 
-        Tika tika = new Tika(new TikaConfig(NamedEntityParser.class.getResourceAsStream("tika-config.xml")));
+        Tika tika = new Tika(new TikaConfig(NamedEntityParserTest.class.getResourceAsStream("tika-config-for-ner.xml")));
         Metadata md = new Metadata();
         tika.parse(new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), md);
 

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
new file mode 100644
index 0000000..e6fa39e
--- /dev/null
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+WEEK_DAY=(?i)((sun)|(mon)|(tues)|(thurs)|(fri)|((sat)(ur)?))(day)?
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config-for-ner.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config-for-ner.xml b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config-for-ner.xml
new file mode 100644
index 0000000..267c399
--- /dev/null
+++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config-for-ner.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.ner.NamedEntityParser">
+            <mime>text/plain</mime>
+            <mime>text/html</mime>
+            <mime>application/xhtml+xml</mime>
+        </parser>
+    </parsers>
+
+</properties>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-database-module/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-database-module/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java b/tika-parser-modules/tika-parser-database-module/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
index 7ea27fa..d394c61 100644
--- a/tika-parser-modules/tika-parser-database-module/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
+++ b/tika-parser-modules/tika-parser-database-module/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
@@ -20,10 +20,14 @@ package org.apache.tika.parser.jdbc;
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 
+import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -41,13 +45,27 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.ToXMLContentHandler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
 public class SQLite3ParserTest extends TikaTest {
     private final static String TEST_FILE_NAME = "testSqlite3b.db";
-    private final static String TEST_FILE1 = "/test-documents/" + TEST_FILE_NAME;
+    static Path tmp = null;
+    @BeforeClass
+    public static void createTMPFile() throws IOException {
+        tmp = Files.createTempFile("sqlite-", "");
+        Files.copy(
+                TikaTest.class.getClassLoader().getResourceAsStream("test-documents/"+TEST_FILE_NAME),
+                tmp, StandardCopyOption.REPLACE_EXISTING);
+
+    }
+
+    @AfterClass
+    public static void deleteTMPFile() throws IOException {
+        Files.delete(tmp);
+    }
 
     @Test
     public void testBasic() throws Exception {
@@ -56,18 +74,20 @@ public class SQLite3ParserTest extends TikaTest {
         //test different types of input streams
         //actual inputstream, memory buffered bytearray and literal file
         InputStream[] streams = new InputStream[3];
-        streams[0] = getResourceAsStream(TEST_FILE1);
+        streams[0] = getTestDocumentAsStream(TEST_FILE_NAME);
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
-        IOUtils.copy(getResourceAsStream(TEST_FILE1), bos);
+        IOUtils.copy(getTestDocumentAsStream(TEST_FILE_NAME), bos);
         streams[1] = new ByteArrayInputStream(bos.toByteArray());
-        streams[2] = TikaInputStream.get(getResourceAsFile(TEST_FILE1));
+        streams[2] = TikaInputStream.get(tmp);
         int tests = 0;
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, p);
         for (InputStream stream : streams) {
             Metadata metadata = new Metadata();
             metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
             //1) getXML closes the stream
             //2) getXML runs recursively on the contents, so the embedded docs should show up
-            XMLResult result = getXML(stream, p, metadata);
+            XMLResult result = getXML(stream, p, metadata, context);
             String x = result.xml;
             //first table name
             assertContains("<table name=\"my_table1\"><thead><tr>\t<th>INT_COL</th>", x);
@@ -106,7 +126,7 @@ public class SQLite3ParserTest extends TikaTest {
         ContentHandler handler = new BodyContentHandler(-1);
         ParseContext ctx = new ParseContext();
         ctx.set(Parser.class, p);
-        try (InputStream stream = getResourceAsStream(TEST_FILE1)) {
+        try (InputStream stream = getTestDocumentAsStream(TEST_FILE_NAME)) {
             p.parse(stream, handler, metadata, ctx);
         }
         String s = handler.toString();
@@ -118,14 +138,11 @@ public class SQLite3ParserTest extends TikaTest {
     //to handle embedded documents
     @Test
     public void testNotAddingEmbeddedParserToParseContext() throws Exception {
-        Parser p = new AutoDetectParser();
 
-        InputStream is = getResourceAsStream(TEST_FILE1);
         Metadata metadata = new Metadata();
         metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
-        ContentHandler handler = new ToXMLContentHandler();
-        p.parse(is, handler, metadata, new ParseContext());
-        String xml = handler.toString();
+        XMLResult r = getXML(TEST_FILE_NAME, new AutoDetectParser(), new Metadata(), new ParseContext());
+        String xml = r.xml;
         //just includes headers for embedded documents
         assertContains("<table name=\"my_table1\"><thead><tr>", xml);
         assertContains("<td><span type=\"blob\" column_name=\"BYTES_COL\" row_number=\"0\"><div class=\"package-entry\"><h1>BYTES_COL_0.doc</h1>", xml);
@@ -143,7 +160,7 @@ public class SQLite3ParserTest extends TikaTest {
         RecursiveParserWrapper wrapper =
                 new RecursiveParserWrapper(p, new BasicContentHandlerFactory(
                         BasicContentHandlerFactory.HANDLER_TYPE.BODY, -1));
-        InputStream is = getResourceAsStream(TEST_FILE1);
+        InputStream is = getTestDocumentAsStream(TEST_FILE_NAME);
         Metadata metadata = new Metadata();
         metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
         wrapper.parse(is, new BodyContentHandler(-1), metadata, new ParseContext());
@@ -176,7 +193,7 @@ public class SQLite3ParserTest extends TikaTest {
 
         ParserContainerExtractor ex = new ParserContainerExtractor();
         ByteCopyingHandler byteCopier = new ByteCopyingHandler();
-        InputStream is = getResourceAsStream(TEST_FILE1);
+        InputStream is = getTestDocumentAsStream(TEST_FILE_NAME);
         Metadata metadata = new Metadata();
         metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
         ex.extract(TikaInputStream.get(is), ex, byteCopier);
@@ -217,9 +234,12 @@ public class SQLite3ParserTest extends TikaTest {
         //4x word files, two docs and two docxs
         //4x png files, the same image embedded in each of the doc and docx
 
+        //not clear why we get an exception on reset if we try
+        //to get the test file directly
         ParserContainerExtractor ex = new ParserContainerExtractor();
         InputStreamResettingHandler byteCopier = new InputStreamResettingHandler();
-        InputStream is = getResourceAsStream(TEST_FILE1);
+        InputStream is = new BufferedInputStream(
+                getResourceAsStream("/test-documents/"+TEST_FILE_NAME));
         Metadata metadata = new Metadata();
         metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
         ex.extract(TikaInputStream.get(is), ex, byteCopier);

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
index 90a3c1a..5f53870 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
@@ -20,11 +20,8 @@ import static java.nio.charset.StandardCharsets.ISO_8859_1;
 import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.URL;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
@@ -34,6 +31,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.regex.Pattern;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
@@ -45,7 +43,7 @@ import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.SAXException;
 
-public class TestChmExtraction {
+public class TestChmExtraction extends TikaTest {
 
     private final Parser parser = new ChmParser();
 
@@ -196,12 +194,19 @@ public class TestChmExtraction {
     
     @Test
     public void test_TIKA_1446() throws Exception {
-        URL chmDir = TestChmExtraction.class.getResource("/test-documents/chm/");
-        File chmFolder = new File(chmDir.toURI());
-        for (String fileName : chmFolder.list()) {
-            File file = new File(chmFolder, fileName);
-            InputStream stream = new FileInputStream(file);
-            testingChm(stream);
+        String[] chemFiles = {
+                "admin.chm",
+                "cmak_ops.CHM",
+                "comexp.CHM",
+                "gpedit.CHM",
+                "IMJPCL.CHM",
+                "IMJPCLE.CHM",
+                "IMTCEN.CHM",
+                "tcpip.CHM",
+                "wmicontrol.CHM"
+        };
+        for (String fileName : chemFiles) {
+            testingChm(getTestDocumentAsStream("chm/"+fileName));
         }
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
index 4f5bfcd..4b92e88 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
@@ -16,8 +16,6 @@
  */
 package org.apache.tika.parser.microsoft;
 
-import static org.apache.tika.TikaTest.assertContains;
-import static org.apache.tika.TikaTest.assertNotContained;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -25,6 +23,7 @@ import static org.junit.Assert.fail;
 import java.io.InputStream;
 import java.util.Locale;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.detect.DefaultDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.EncryptedDocumentException;
@@ -41,155 +40,139 @@ import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
-public class ExcelParserTest {
+public class ExcelParserTest extends TikaTest {
     @Test
     @SuppressWarnings("deprecation") // Checks legacy Tika-1.0 style metadata keys
     public void testExcelParser() throws Exception {
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL.xls")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            new OfficeParser().parse(input, handler, metadata, context);
-
-            assertEquals(
-                    "application/vnd.ms-excel",
-                    metadata.get(Metadata.CONTENT_TYPE));
-            assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
-
-            // Mon Oct 01 17:13:56 BST 2007
-            assertEquals("2007-10-01T16:13:56Z", metadata.get(TikaCoreProperties.CREATED));
-            assertEquals("2007-10-01T16:13:56Z", metadata.get(Metadata.CREATION_DATE));
 
-            // Mon Oct 01 17:31:43 BST 2007
-            assertEquals("2007-10-01T16:31:43Z", metadata.get(TikaCoreProperties.MODIFIED));
-            assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
+        ParseContext context = new ParseContext();
+        context.set(Locale.class, Locale.US);
+        XMLResult r = getXML("testEXCEL.xls", new OfficeParser(), new Metadata(), context);
+
+        assertEquals(
+                "application/vnd.ms-excel",
+                r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Simple Excel document", r.metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Keith Bennett", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Keith Bennett", r.metadata.get(Metadata.AUTHOR));
+
+        // Mon Oct 01 17:13:56 BST 2007
+        assertEquals("2007-10-01T16:13:56Z", r.metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("2007-10-01T16:13:56Z", r.metadata.get(Metadata.CREATION_DATE));
+
+        // Mon Oct 01 17:31:43 BST 2007
+        assertEquals("2007-10-01T16:31:43Z", r.metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2007-10-01T16:31:43Z", r.metadata.get(Metadata.DATE));
+
+        String content = r.xml;
+        assertContains("Sample Excel Worksheet", content);
+        assertContains("Numbers and their Squares", content);
+        assertContains("<tr>\t<td />\t<td>Number</td>\t<td>Square", content);
+        assertContains("9", content);
+        assertNotContained("9.0", content);
+        assertContains("196", content);
+        assertNotContained("196.0", content);
 
-            String content = handler.toString();
-            assertContains("Sample Excel Worksheet", content);
-            assertContains("Numbers and their Squares", content);
-            assertContains("\t\tNumber\tSquare", content);
-            assertContains("9", content);
-            assertNotContained("9.0", content);
-            assertContains("196", content);
-            assertNotContained("196.0", content);
-        }
     }
 
     @Test
     public void testExcelParserFormatting() throws Exception {
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL-formats.xls")) {
-            Metadata metadata = new Metadata();
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            ContentHandler handler = new BodyContentHandler();
-            new OfficeParser().parse(input, handler, metadata, context);
-
-            assertEquals(
-                    "application/vnd.ms-excel",
-                    metadata.get(Metadata.CONTENT_TYPE));
+        ParseContext context = new ParseContext();
+        context.set(Locale.class, Locale.US);
+        XMLResult r = getXML("testEXCEL-formats.xls", new OfficeParser(), new Metadata(), context);
+
+        assertEquals(
+                "application/vnd.ms-excel",
+                r.metadata.get(Metadata.CONTENT_TYPE));
+
+        String content = r.xml;
+
+        // Number #,##0.00
+        assertContains("1,599.99", content);
+        assertContains("-1,599.99", content);
+
+        // Currency $#,##0.00;[Red]($#,##0.00)
+        assertContains("$1,599.99", content);
+        assertContains("($1,599.99)", content);
+
+        // Scientific 0.00E+00
+        // poi <=3.8beta1 returns 1.98E08, newer versions return 1.98+E08
+        assertTrue(content.contains("1.98E08") || content.contains("1.98E+08"));
+        assertTrue(content.contains("-1.98E08") || content.contains("-1.98E+08"));
+
+        // Percentage.
+        assertContains("2.50%", content);
+        // Excel rounds up to 3%, but that requires Java 1.6 or later
+        if (System.getProperty("java.version").startsWith("1.5")) {
+            assertContains("2%", content);
+        } else {
+            assertContains("3%", content);
+        }
 
-            String content = handler.toString();
+        // Time Format: h:mm
+        assertContains("6:15", content);
+        assertContains("18:15", content);
 
-            // Number #,##0.00
-            assertContains("1,599.99", content);
-            assertContains("-1,599.99", content);
-
-            // Currency $#,##0.00;[Red]($#,##0.00)
-            assertContains("$1,599.99", content);
-            assertContains("($1,599.99)", content);
-
-            // Scientific 0.00E+00
-            // poi <=3.8beta1 returns 1.98E08, newer versions return 1.98+E08
-            assertTrue(content.contains("1.98E08") || content.contains("1.98E+08"));
-            assertTrue(content.contains("-1.98E08") || content.contains("-1.98E+08"));
-
-            // Percentage.
-            assertContains("2.50%", content);
-            // Excel rounds up to 3%, but that requires Java 1.6 or later
-            if (System.getProperty("java.version").startsWith("1.5")) {
-                assertContains("2%", content);
-            } else {
-                assertContains("3%", content);
-            }
+        // Date Format: d-mmm-yy
+        assertContains("17-May-07", content);
 
-            // Time Format: h:mm
-            assertContains("6:15", content);
-            assertContains("18:15", content);
+        // Date Format: m/d/yy
+        assertContains("10/3/09", content);
 
-            // Date Format: d-mmm-yy
-            assertContains("17-May-07", content);
+        // Date/Time Format: m/d/yy h:mm
+        assertContains("1/19/08 4:35", content);
 
-            // Date Format: m/d/yy
-            assertContains("10/3/09", content);
+        // Fraction (2.5): # ?/?
+        assertContains("2 1/2", content);
 
-            // Date/Time Format: m/d/yy h:mm
-            assertContains("1/19/08 4:35", content);
 
-            // Fraction (2.5): # ?/?
-            assertContains("2 1/2", content);
+        // Below assertions represent outstanding formatting issues to be addressed
+        // they are included to allow the issues to be progressed with the Apache POI
+        // team - See TIKA-103.
 
+        /*************************************************************************
+         // Custom Number (0 "dollars and" .00 "cents")
+         assertContains("19 dollars and .99 cents", content);
 
-            // Below assertions represent outstanding formatting issues to be addressed
-            // they are included to allow the issues to be progressed with the Apache POI
-            // team - See TIKA-103.
+         // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
+         assertContains("At 4:20 AM on Thursday May 17, 2007", content);
+         **************************************************************************/
 
-            /*************************************************************************
-             // Custom Number (0 "dollars and" .00 "cents")
-             assertContains("19 dollars and .99 cents", content);
 
-             // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
-             assertContains("At 4:20 AM on Thursday May 17, 2007", content);
-             **************************************************************************/
-
-        }
     }
 
     @Test
     public void testExcelParserPassword() throws Exception {
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL_protected_passtika.xls")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            new OfficeParser().parse(input, handler, metadata, context);
+        try {
+            XMLResult r = getXML("testEXCEL_protected_passtika.xls");
             fail("Document is encrypted, shouldn't parse");
         } catch (EncryptedDocumentException e) {
             // Good
         }
 
         // Try again, this time with the password
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL_protected_passtika.xls")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            context.set(PasswordProvider.class, new PasswordProvider() {
-                @Override
-                public String getPassword(Metadata metadata) {
-                    return "tika";
-                }
-            });
-            new OfficeParser().parse(input, handler, metadata, context);
-
-            assertEquals(
-                    "application/vnd.ms-excel",
-                    metadata.get(Metadata.CONTENT_TYPE));
-
-            assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("Antoni", metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("2011-11-25T09:52:48Z", metadata.get(TikaCoreProperties.CREATED));
+        ParseContext context = new ParseContext();
+        context.set(Locale.class, Locale.US);
+        context.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "tika";
+            }
+        });
+        XMLResult r = getXML("testEXCEL_protected_passtika.xls", new OfficeParser(), new Metadata(), context);
+
+        assertEquals(
+                "application/vnd.ms-excel",
+                r.metadata.get(Metadata.CONTENT_TYPE));
+
+        assertEquals(null, r.metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Antoni", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("2011-11-25T09:52:48Z", r.metadata.get(TikaCoreProperties.CREATED));
+
+        String content = r.xml;
+        assertContains("This is an Encrypted Excel spreadsheet", content);
+        assertNotContained("9.0", content);
 
-            String content = handler.toString();
-            assertContains("This is an Encrypted Excel spreadsheet", content);
-            assertNotContained("9.0", content);
-        }
     }
 
     /**
@@ -197,70 +180,48 @@ public class ExcelParserTest {
      */
     @Test
     public void testExcelParserCharts() throws Exception {
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL-charts.xls")) {
-            Metadata metadata = new Metadata();
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            ContentHandler handler = new BodyContentHandler();
-            new OfficeParser().parse(input, handler, metadata, context);
 
-            assertEquals(
-                    "application/vnd.ms-excel",
-                    metadata.get(Metadata.CONTENT_TYPE));
+        XMLResult r = getXML("testEXCEL-charts.xls", new OfficeParser());
+        assertEquals(
+                "application/vnd.ms-excel",
+                r.metadata.get(Metadata.CONTENT_TYPE));
 
-            String content = handler.toString();
+        String content = r.xml;
+
+        // The first sheet has a pie chart
+        assertContains("charttabyodawg", content);
+        assertContains("WhamPuff", content);
+
+        // The second sheet has a bar chart and some text
+        assertContains("Sheet1", content);
+        assertContains("Test Excel Spreasheet", content);
+        assertContains("foo", content);
+        assertContains("bar", content);
+        assertContains("fizzlepuff", content);
+        assertContains("whyaxis", content);
+        assertContains("eksaxis", content);
+
+        // The third sheet has some text
+        assertContains("Sheet2", content);
+        assertContains("dingdong", content);
 
-            // The first sheet has a pie chart
-            assertContains("charttabyodawg", content);
-            assertContains("WhamPuff", content);
-
-            // The second sheet has a bar chart and some text
-            assertContains("Sheet1", content);
-            assertContains("Test Excel Spreasheet", content);
-            assertContains("foo", content);
-            assertContains("bar", content);
-            assertContains("fizzlepuff", content);
-            assertContains("whyaxis", content);
-            assertContains("eksaxis", content);
-
-            // The third sheet has some text
-            assertContains("Sheet2", content);
-            assertContains("dingdong", content);
-        }
     }
 
     @Test
     public void testJXL() throws Exception {
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/jxl.xls")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler(-1);
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            new OfficeParser().parse(input, handler, metadata, context);
 
-            assertEquals(
-                    "application/vnd.ms-excel",
-                    metadata.get(Metadata.CONTENT_TYPE));
-            String content = handler.toString();
-            assertContains("Number Formats", content);
-        }
+        XMLResult r = getXML("jxl.xls", new OfficeParser());
+        assertEquals(
+                "application/vnd.ms-excel",
+                r.metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("Number Formats", r.xml);
+
     }
 
     @Test
     public void testWorksSpreadsheet70() throws Exception {
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testWORKSSpreadsheet7.0.xlr")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler(-1);
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            new OfficeParser().parse(input, handler, metadata, context);
-
-            String content = handler.toString();
-            assertContains("Microsoft Works", content);
-        }
+        assertContains("Microsoft Works",
+                getXML("testWORKSSpreadsheet7.0.xlr", new OfficeParser()).xml);
     }
 
     /**
@@ -278,8 +239,7 @@ public class ExcelParserTest {
 
         // Should be detected correctly
         MediaType type;
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL.xlsb")) {
+        try (InputStream input = getTestDocumentAsStream("testEXCEL.xlsb")) {
             type = detector.detect(input, m);
             assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
         }
@@ -291,15 +251,8 @@ public class ExcelParserTest {
         assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
 
         // AutoDetectParser doesn't break on it
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
-            ContentHandler handler = new BodyContentHandler(-1);
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            parser.parse(input, handler, m, context);
+        assertContains("<body />", getXML("testEXCEL.xlsb").xml);
 
-            String content = handler.toString();
-            assertEquals("", content);
-        }
     }
 
     /**
@@ -315,7 +268,7 @@ public class ExcelParserTest {
         // First try detection of Excel 5
         m = new Metadata();
         m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+        try (InputStream input = getTestDocumentAsStream("testEXCEL_5.xls")) {
             type = detector.detect(input, m);
             assertEquals("application/vnd.ms-excel", type.toString());
         }
@@ -323,7 +276,7 @@ public class ExcelParserTest {
         // Now Excel 95
         m = new Metadata();
         m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+        try (InputStream input = getTestDocumentAsStream("testEXCEL_95.xls")) {
             type = detector.detect(input, m);
             assertEquals("application/vnd.ms-excel", type.toString());
         }
@@ -337,7 +290,7 @@ public class ExcelParserTest {
 
         // Parse the Excel 5 file
         m = new Metadata();
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
+        try (InputStream input = getTestDocumentAsStream("testEXCEL_5.xls")) {
             ContentHandler handler = new BodyContentHandler(-1);
             ParseContext context = new ParseContext();
             context.set(Locale.class, Locale.US);
@@ -364,7 +317,7 @@ public class ExcelParserTest {
 
         // Parse the Excel 95 file
         m = new Metadata();
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
+        try (InputStream input = getTestDocumentAsStream("testEXCEL_95.xls")) {
             ContentHandler handler = new BodyContentHandler(-1);
             ParseContext context = new ParseContext();
             context.set(Locale.class, Locale.US);
@@ -388,16 +341,11 @@ public class ExcelParserTest {
      */
     @Test
     public void testCustomProperties() throws Exception {
-        Metadata metadata = new Metadata();
-
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL_custom_props.xls")) {
-            ContentHandler handler = new BodyContentHandler(-1);
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.US);
-            new OfficeParser().parse(input, handler, metadata, context);
-        }
+        ParseContext context = new ParseContext();
+        context.set(Locale.class, Locale.US);
 
+        XMLResult r = getXML("testEXCEL_custom_props.xls", new OfficeParser(), new Metadata(), context);
+        Metadata metadata = r.metadata;
         assertEquals("application/vnd.ms-excel", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("", metadata.get(TikaCoreProperties.CREATOR));
         assertEquals("", metadata.get(TikaCoreProperties.MODIFIER));
@@ -413,31 +361,30 @@ public class ExcelParserTest {
 
 	@Test
     public void testHeaderAndFooterExtraction() throws Exception {
-        try (InputStream input = ExcelParserTest.class.getResourceAsStream(
-                "/test-documents/testEXCEL_headers_footers.xls")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            ParseContext context = new ParseContext();
-            context.set(Locale.class, Locale.UK);
-            new OfficeParser().parse(input, handler, metadata, context);
+        ParseContext context = new ParseContext();
+        context.set(Locale.class, Locale.UK);
+
+        XMLResult r = getXML("testEXCEL_headers_footers.xls", new OfficeParser(),
+                new Metadata(), context);
+
+        Metadata metadata = r.metadata;
+        assertEquals(
+                "application/vnd.ms-excel",
+                metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Internal spreadsheet", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Aeham Abushwashi", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Aeham Abushwashi", metadata.get(Metadata.AUTHOR));
+
+        String content = r.xml;
+        assertContains("John Smith1", content);
+        assertContains("John Smith50", content);
+        assertContains("1 Corporate HQ", content);
+        assertContains("Header - Corporate Spreadsheet", content);
+        assertContains("Header - For Internal Use Only", content);
+        assertContains("Header - Author: John Smith", content);
+        assertContains("Footer - Corporate Spreadsheet", content);
+        assertContains("Footer - For Internal Use Only", content);
+        assertContains("Footer - Author: John Smith", content);
 
-            assertEquals(
-                    "application/vnd.ms-excel",
-                    metadata.get(Metadata.CONTENT_TYPE));
-            assertEquals("Internal spreadsheet", metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("Aeham Abushwashi", metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("Aeham Abushwashi", metadata.get(Metadata.AUTHOR));
-
-            String content = handler.toString();
-            assertContains("John Smith1", content);
-            assertContains("John Smith50", content);
-            assertContains("1 Corporate HQ", content);
-            assertContains("Header - Corporate Spreadsheet", content);
-            assertContains("Header - For Internal Use Only", content);
-            assertContains("Header - Author: John Smith", content);
-            assertContains("Footer - Corporate Spreadsheet", content);
-            assertContains("Footer - For Internal Use Only", content);
-            assertContains("Footer - Author: John Smith", content);
-        }
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index 8a7c202..3cfda82 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -17,9 +17,11 @@
 package org.apache.tika.parser.odf;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
 import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.io.TikaInputStream;
@@ -27,7 +29,6 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.OfficeOpenXMLCore;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.opendocument.OpenOfficeParser;
@@ -50,270 +51,235 @@ public class ODFParserTest extends TikaTest {
     @Test
     public void testOO3() throws Exception {
        for (Parser parser : getParsers()) {
-           try (InputStream input = ODFParserTest.class.getResourceAsStream(
-                   "/test-documents/testODFwithOOo3.odt")) {
-               Metadata metadata = new Metadata();
-               ContentHandler handler = new BodyContentHandler();
-               parser.parse(input, handler, metadata, new ParseContext());
-
-               assertEquals(
-                       "application/vnd.oasis.opendocument.text",
-                       metadata.get(Metadata.CONTENT_TYPE));
-
-               String content = handler.toString();
-               assertContains("Tika is part of the Lucene project.", content);
-               assertContains("Solr", content);
-               assertContains("one embedded", content);
-               assertContains("Rectangle Title", content);
-               assertContains("a blue background and dark border", content);
-           }
+           XMLResult r = getXML("testODFwithOOo3.odt", parser);
+           assertEquals(
+                   "application/vnd.oasis.opendocument.text",
+                   r.metadata.get(Metadata.CONTENT_TYPE));
+
+           String content = r.xml;
+           assertContains("Tika is part of the Lucene project.", content);
+           assertContains("Solr", content);
+           assertContains("one embedded", content);
+           assertContains("Rectangle Title", content);
+           assertContains("a blue background and dark border", content);
+
        }
     }
 
     @Test
     public void testOO2() throws Exception {
-       for (Parser parser : getParsers()) {
-           try (InputStream input = ODFParserTest.class.getResourceAsStream(
-                   "/test-documents/testOpenOffice2.odt")) {
-               Metadata metadata = new Metadata();
-               ContentHandler handler = new BodyContentHandler();
-               parser.parse(input, handler, metadata, new ParseContext());
-
-               assertEquals(
-                       "application/vnd.oasis.opendocument.text",
-                       metadata.get(Metadata.CONTENT_TYPE));
-               assertEquals("en-US", metadata.get(Metadata.LANGUAGE));
-               assertEquals("PT1M7S", metadata.get(Metadata.EDIT_TIME));
-               assertEquals(
-                       "NeoOffice/2.2$Unix OpenOffice.org_project/680m18$Build-9161",
-                       metadata.get("generator"));
-
-               // Check date metadata, both old-style and new-style
-               assertEquals("2007-09-14T11:07:10", metadata.get(TikaCoreProperties.MODIFIED));
-               assertEquals("2007-09-14T11:07:10", metadata.get(Metadata.MODIFIED));
-               assertEquals("2007-09-14T11:07:10", metadata.get(Metadata.DATE));
-               assertEquals("2007-09-14T11:06:08", metadata.get(TikaCoreProperties.CREATED));
-               assertEquals("2007-09-14T11:06:08", metadata.get(Metadata.CREATION_DATE));
-
-               // Check the document statistics
-               assertEquals("1", metadata.get(Office.PAGE_COUNT));
-               assertEquals("1", metadata.get(Office.PARAGRAPH_COUNT));
-               assertEquals("14", metadata.get(Office.WORD_COUNT));
-               assertEquals("78", metadata.get(Office.CHARACTER_COUNT));
-               assertEquals("0", metadata.get(Office.TABLE_COUNT));
-               assertEquals("0", metadata.get(Office.OBJECT_COUNT));
-               assertEquals("0", metadata.get(Office.IMAGE_COUNT));
-
-               // Check the Tika-1.0 style document statistics
-               assertEquals("1", metadata.get(Metadata.PAGE_COUNT));
-               assertEquals("1", metadata.get(Metadata.PARAGRAPH_COUNT));
-               assertEquals("14", metadata.get(Metadata.WORD_COUNT));
-               assertEquals("78", metadata.get(Metadata.CHARACTER_COUNT));
-               assertEquals("0", metadata.get(Metadata.TABLE_COUNT));
-               assertEquals("0", metadata.get(Metadata.OBJECT_COUNT));
-               assertEquals("0", metadata.get(Metadata.IMAGE_COUNT));
-
-               // Check the very old style statistics (these will be removed shortly)
-               assertEquals("0", metadata.get("nbTab"));
-               assertEquals("0", metadata.get("nbObject"));
-               assertEquals("0", metadata.get("nbImg"));
-               assertEquals("1", metadata.get("nbPage"));
-               assertEquals("1", metadata.get("nbPara"));
-               assertEquals("14", metadata.get("nbWord"));
-               assertEquals("78", metadata.get("nbCharacter"));
-
-               // Custom metadata tags present but without values
-               assertEquals(null, metadata.get("custom:Info 1"));
-               assertEquals(null, metadata.get("custom:Info 2"));
-               assertEquals(null, metadata.get("custom:Info 3"));
-               assertEquals(null, metadata.get("custom:Info 4"));
-
-               String content = handler.toString();
-               assertTrue(content.contains(
-                       "This is a sample Open Office document,"
-                               + " written in NeoOffice 2.2.1 for the Mac."));
-           }
-       }
-   }
-
-   /**
-    * Similar to {@link #testXMLParser()}, but using a different
-    *  OO2 file with different metadata in it
-    */
-    @Test
-    public void testOO2Metadata() throws Exception {
-        try (InputStream input = ODFParserTest.class.getResourceAsStream(
-                "/test-documents/testOpenOffice2.odf")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new OpenDocumentParser().parse(input, handler, metadata);
-
-            assertEquals(
-                    "application/vnd.oasis.opendocument.formula",
-                    metadata.get(Metadata.CONTENT_TYPE));
-            assertEquals(null, metadata.get(TikaCoreProperties.MODIFIED));
-            assertEquals("2006-01-27T11:55:22", metadata.get(Metadata.CREATION_DATE));
-            assertEquals("The quick brown fox jumps over the lazy dog",
-                    metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("Gym class featuring a brown fox and lazy dog",
-                    metadata.get(TikaCoreProperties.DESCRIPTION));
-            assertEquals("Gym class featuring a brown fox and lazy dog",
-                    metadata.get(OfficeOpenXMLCore.SUBJECT));
-            assertEquals("Gym class featuring a brown fox and lazy dog",
-                    metadata.get(Metadata.SUBJECT));
-            assertEquals("PT0S", metadata.get(Metadata.EDIT_TIME));
-            assertEquals("1", metadata.get("editing-cycles"));
-            assertEquals(
-                    "OpenOffice.org/2.2$Win32 OpenOffice.org_project/680m14$Build-9134",
-                    metadata.get("generator"));
-            assertEquals("Pangram, fox, dog", metadata.get(Metadata.KEYWORDS));
-
-            // User defined metadata
-            assertEquals("Text 1", metadata.get("custom:Info 1"));
-            assertEquals("2", metadata.get("custom:Info 2"));
-            assertEquals("false", metadata.get("custom:Info 3"));
-            assertEquals("true", metadata.get("custom:Info 4"));
-
-            // No statistics present
-            assertEquals(null, metadata.get(Metadata.PAGE_COUNT));
-            assertEquals(null, metadata.get(Metadata.PARAGRAPH_COUNT));
-            assertEquals(null, metadata.get(Metadata.WORD_COUNT));
-            assertEquals(null, metadata.get(Metadata.CHARACTER_COUNT));
-            assertEquals(null, metadata.get(Metadata.TABLE_COUNT));
-            assertEquals(null, metadata.get(Metadata.OBJECT_COUNT));
-            assertEquals(null, metadata.get(Metadata.IMAGE_COUNT));
-            assertEquals(null, metadata.get("nbTab"));
-            assertEquals(null, metadata.get("nbObject"));
-            assertEquals(null, metadata.get("nbImg"));
-            assertEquals(null, metadata.get("nbPage"));
-            assertEquals(null, metadata.get("nbPara"));
-            assertEquals(null, metadata.get("nbWord"));
-            assertEquals(null, metadata.get("nbCharacter"));
-
-            // Note - contents of maths files not currently supported
-            String content = handler.toString();
-            assertEquals("", content);
-        }
-   }
-
-   /**
-    * Similar to {@link #testXMLParser()}, but using an OO3 file
-    */
-    @Test
-   public void testOO3Metadata() throws Exception {
-        try (InputStream input = ODFParserTest.class.getResourceAsStream(
-                "/test-documents/testODFwithOOo3.odt")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new OpenDocumentParser().parse(input, handler, metadata);
-
+        for (Parser parser : getParsers()) {
+            XMLResult r = getXML("testOpenOffice2.odt", parser);
+            Metadata metadata = r.metadata;
             assertEquals(
                     "application/vnd.oasis.opendocument.text",
                     metadata.get(Metadata.CONTENT_TYPE));
-            assertEquals("2009-10-05T21:22:38", metadata.get(TikaCoreProperties.MODIFIED));
-            assertEquals("2009-10-05T19:04:01", metadata.get(TikaCoreProperties.CREATED));
-            assertEquals("2009-10-05T19:04:01", metadata.get(Metadata.CREATION_DATE));
-            assertEquals("Apache Tika", metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("Test document", metadata.get(OfficeOpenXMLCore.SUBJECT));
-            assertEquals("Test document", metadata.get(Metadata.SUBJECT));
-            assertEquals("A rather complex document", metadata.get(TikaCoreProperties.DESCRIPTION));
-            assertEquals("Bart Hanssens", metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("Bart Hanssens", metadata.get("initial-creator"));
-            assertEquals("2", metadata.get("editing-cycles"));
-            assertEquals("PT02H03M24S", metadata.get(Metadata.EDIT_TIME));
+            assertEquals("en-US", metadata.get(Metadata.LANGUAGE));
+            assertEquals("PT1M7S", metadata.get(Metadata.EDIT_TIME));
             assertEquals(
-                    "OpenOffice.org/3.1$Unix OpenOffice.org_project/310m19$Build-9420",
+                    "NeoOffice/2.2$Unix OpenOffice.org_project/680m18$Build-9161",
                     metadata.get("generator"));
-            assertEquals("Apache, Lucene, Tika", metadata.get(Metadata.KEYWORDS));
 
-            // User defined metadata
-            assertEquals("Bart Hanssens", metadata.get("custom:Editor"));
-            assertEquals(null, metadata.get("custom:Info 2"));
-            assertEquals(null, metadata.get("custom:Info 3"));
-            assertEquals(null, metadata.get("custom:Info 4"));
+            // Check date metadata, both old-style and new-style
+            assertEquals("2007-09-14T11:07:10", metadata.get(TikaCoreProperties.MODIFIED));
+            assertEquals("2007-09-14T11:07:10", metadata.get(Metadata.MODIFIED));
+            assertEquals("2007-09-14T11:07:10", metadata.get(Metadata.DATE));
+            assertEquals("2007-09-14T11:06:08", metadata.get(TikaCoreProperties.CREATED));
+            assertEquals("2007-09-14T11:06:08", metadata.get(Metadata.CREATION_DATE));
 
             // Check the document statistics
-            assertEquals("2", metadata.get(Office.PAGE_COUNT));
-            assertEquals("13", metadata.get(Office.PARAGRAPH_COUNT));
-            assertEquals("54", metadata.get(Office.WORD_COUNT));
-            assertEquals("351", metadata.get(Office.CHARACTER_COUNT));
+            assertEquals("1", metadata.get(Office.PAGE_COUNT));
+            assertEquals("1", metadata.get(Office.PARAGRAPH_COUNT));
+            assertEquals("14", metadata.get(Office.WORD_COUNT));
+            assertEquals("78", metadata.get(Office.CHARACTER_COUNT));
             assertEquals("0", metadata.get(Office.TABLE_COUNT));
-            assertEquals("2", metadata.get(Office.OBJECT_COUNT));
+            assertEquals("0", metadata.get(Office.OBJECT_COUNT));
             assertEquals("0", metadata.get(Office.IMAGE_COUNT));
 
             // Check the Tika-1.0 style document statistics
-            assertEquals("2", metadata.get(Metadata.PAGE_COUNT));
-            assertEquals("13", metadata.get(Metadata.PARAGRAPH_COUNT));
-            assertEquals("54", metadata.get(Metadata.WORD_COUNT));
-            assertEquals("351", metadata.get(Metadata.CHARACTER_COUNT));
+            assertEquals("1", metadata.get(Metadata.PAGE_COUNT));
+            assertEquals("1", metadata.get(Metadata.PARAGRAPH_COUNT));
+            assertEquals("14", metadata.get(Metadata.WORD_COUNT));
+            assertEquals("78", metadata.get(Metadata.CHARACTER_COUNT));
             assertEquals("0", metadata.get(Metadata.TABLE_COUNT));
-            assertEquals("2", metadata.get(Metadata.OBJECT_COUNT));
+            assertEquals("0", metadata.get(Metadata.OBJECT_COUNT));
             assertEquals("0", metadata.get(Metadata.IMAGE_COUNT));
 
-            // Check the old style statistics (these will be removed shortly)
+            // Check the very old style statistics (these will be removed shortly)
             assertEquals("0", metadata.get("nbTab"));
-            assertEquals("2", metadata.get("nbObject"));
+            assertEquals("0", metadata.get("nbObject"));
             assertEquals("0", metadata.get("nbImg"));
-            assertEquals("2", metadata.get("nbPage"));
-            assertEquals("13", metadata.get("nbPara"));
-            assertEquals("54", metadata.get("nbWord"));
-            assertEquals("351", metadata.get("nbCharacter"));
+            assertEquals("1", metadata.get("nbPage"));
+            assertEquals("1", metadata.get("nbPara"));
+            assertEquals("14", metadata.get("nbWord"));
+            assertEquals("78", metadata.get("nbCharacter"));
+
+            // Custom metadata tags present but without values
+            assertEquals(null, metadata.get("custom:Info 1"));
+            assertEquals(null, metadata.get("custom:Info 2"));
+            assertEquals(null, metadata.get("custom:Info 3"));
+            assertEquals(null, metadata.get("custom:Info 4"));
+
+            assertContains(
+                    "This is a sample Open Office document,"
+                            + " written in NeoOffice 2.2.1 for the Mac.",
+                    r.xml);
 
-            String content = handler.toString();
-            assertTrue(content.contains(
-                    "Apache Tika Tika is part of the Lucene project."
-            ));
         }
    }
 
+   /**
+    * Similar to {@link #testXMLParser()}, but using a different
+    *  OO2 file with different metadata in it
+    */
     @Test
-    public void testODPMasterFooter() throws Exception {
-        try (InputStream input = ODFParserTest.class.getResourceAsStream(
-                "/test-documents/testMasterFooter.odp")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new AutoDetectParser().parse(input, handler, metadata);
+    public void testOO2Metadata() throws Exception {
+        XMLResult r = getXML("testOpenOffice2.odf", new OpenDocumentParser());
+        Metadata metadata = r.metadata;
+        assertEquals(
+                "application/vnd.oasis.opendocument.formula",
+                metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals(null, metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2006-01-27T11:55:22", metadata.get(Metadata.CREATION_DATE));
+        assertEquals("The quick brown fox jumps over the lazy dog",
+                metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Gym class featuring a brown fox and lazy dog",
+                metadata.get(TikaCoreProperties.DESCRIPTION));
+        assertEquals("Gym class featuring a brown fox and lazy dog",
+                metadata.get(OfficeOpenXMLCore.SUBJECT));
+        assertEquals("Gym class featuring a brown fox and lazy dog",
+                metadata.get(Metadata.SUBJECT));
+        assertEquals("PT0S", metadata.get(Metadata.EDIT_TIME));
+        assertEquals("1", metadata.get("editing-cycles"));
+        assertEquals(
+                "OpenOffice.org/2.2$Win32 OpenOffice.org_project/680m14$Build-9134",
+                metadata.get("generator"));
+        assertEquals("Pangram, fox, dog", metadata.get(Metadata.KEYWORDS));
+
+        // User defined metadata
+        assertEquals("Text 1", metadata.get("custom:Info 1"));
+        assertEquals("2", metadata.get("custom:Info 2"));
+        assertEquals("false", metadata.get("custom:Info 3"));
+        assertEquals("true", metadata.get("custom:Info 4"));
+
+        // No statistics present
+        assertEquals(null, metadata.get(Metadata.PAGE_COUNT));
+        assertEquals(null, metadata.get(Metadata.PARAGRAPH_COUNT));
+        assertEquals(null, metadata.get(Metadata.WORD_COUNT));
+        assertEquals(null, metadata.get(Metadata.CHARACTER_COUNT));
+        assertEquals(null, metadata.get(Metadata.TABLE_COUNT));
+        assertEquals(null, metadata.get(Metadata.OBJECT_COUNT));
+        assertEquals(null, metadata.get(Metadata.IMAGE_COUNT));
+        assertEquals(null, metadata.get("nbTab"));
+        assertEquals(null, metadata.get("nbObject"));
+        assertEquals(null, metadata.get("nbImg"));
+        assertEquals(null, metadata.get("nbPage"));
+        assertEquals(null, metadata.get("nbPara"));
+        assertEquals(null, metadata.get("nbWord"));
+        assertEquals(null, metadata.get("nbCharacter"));
+
+        // Note - contents of maths files not currently supported
+        assertContains("<body />", r.xml);
 
-            String content = handler.toString();
-            assertContains("Master footer is here", content);
-        }
-    }  
+   }
 
+   /**
+    * Similar to {@link #testXMLParser()}, but using an OO3 file
+    */
     @Test
-    public void testODTFooter() throws Exception {
-        try (InputStream input = ODFParserTest.class.getResourceAsStream(
-                "/test-documents/testFooter.odt")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new AutoDetectParser().parse(input, handler, metadata);
+   public void testOO3Metadata() throws Exception {
+        XMLResult r = getXML("testODFwithOOo3.odt", new OpenDocumentParser());
+        Metadata metadata = r.metadata;
+        assertEquals(
+                "application/vnd.oasis.opendocument.text",
+                metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("2009-10-05T21:22:38", metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2009-10-05T19:04:01", metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("2009-10-05T19:04:01", metadata.get(Metadata.CREATION_DATE));
+        assertEquals("Apache Tika", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test document", metadata.get(OfficeOpenXMLCore.SUBJECT));
+        assertEquals("Test document", metadata.get(Metadata.SUBJECT));
+        assertEquals("A rather complex document", metadata.get(TikaCoreProperties.DESCRIPTION));
+        assertEquals("Bart Hanssens", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Bart Hanssens", metadata.get("initial-creator"));
+        assertEquals("2", metadata.get("editing-cycles"));
+        assertEquals("PT02H03M24S", metadata.get(Metadata.EDIT_TIME));
+        assertEquals(
+                "OpenOffice.org/3.1$Unix OpenOffice.org_project/310m19$Build-9420",
+                metadata.get("generator"));
+        assertEquals("Apache, Lucene, Tika", metadata.get(Metadata.KEYWORDS));
+
+        // User defined metadata
+        assertEquals("Bart Hanssens", metadata.get("custom:Editor"));
+        assertEquals(null, metadata.get("custom:Info 2"));
+        assertEquals(null, metadata.get("custom:Info 3"));
+        assertEquals(null, metadata.get("custom:Info 4"));
+
+        // Check the document statistics
+        assertEquals("2", metadata.get(Office.PAGE_COUNT));
+        assertEquals("13", metadata.get(Office.PARAGRAPH_COUNT));
+        assertEquals("54", metadata.get(Office.WORD_COUNT));
+        assertEquals("351", metadata.get(Office.CHARACTER_COUNT));
+        assertEquals("0", metadata.get(Office.TABLE_COUNT));
+        assertEquals("2", metadata.get(Office.OBJECT_COUNT));
+        assertEquals("0", metadata.get(Office.IMAGE_COUNT));
+
+        // Check the Tika-1.0 style document statistics
+        assertEquals("2", metadata.get(Metadata.PAGE_COUNT));
+        assertEquals("13", metadata.get(Metadata.PARAGRAPH_COUNT));
+        assertEquals("54", metadata.get(Metadata.WORD_COUNT));
+        assertEquals("351", metadata.get(Metadata.CHARACTER_COUNT));
+        assertEquals("0", metadata.get(Metadata.TABLE_COUNT));
+        assertEquals("2", metadata.get(Metadata.OBJECT_COUNT));
+        assertEquals("0", metadata.get(Metadata.IMAGE_COUNT));
+
+        // Check the old style statistics (these will be removed shortly)
+        assertEquals("0", metadata.get("nbTab"));
+        assertEquals("2", metadata.get("nbObject"));
+        assertEquals("0", metadata.get("nbImg"));
+        assertEquals("2", metadata.get("nbPage"));
+        assertEquals("13", metadata.get("nbPara"));
+        assertEquals("54", metadata.get("nbWord"));
+        assertEquals("351", metadata.get("nbCharacter"));
+
+        assertContains(
+                "Tika is part of the Lucene project.", r.xml);
 
-            String content = handler.toString();
-            assertContains("Here is some text...", content);
-            assertContains("Here is some text on page 2", content);
-            assertContains("Here is footer text", content);
-        }
-    }  
+
+   }
+
+    @Test
+    public void testODPMasterFooter() throws Exception {
+        assertContains("Master footer is here",
+                getXML("testMasterFooter.odp").xml);
+    }
+
+    @Test
+    public void testODTFooter() throws Exception {
+        XMLResult r = getXML("testFooter.odt");
+        assertContains("Here is some text...", r.xml);
+        assertContains("Here is some text on page 2", r.xml);
+        assertContains("Here is footer text", r.xml);
+    }
 
     @Test
     public void testODSFooter() throws Exception {
-        try (InputStream input = ODFParserTest.class.getResourceAsStream(
-                "/test-documents/testFooter.ods")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new AutoDetectParser().parse(input, handler, metadata);
+        assertContains("Here is a footer in the center area",
+                getXML("testFooter.ods").xml);
 
-            String content = handler.toString();
-            assertContains("Here is a footer in the center area", content);
-        }
     }  
     
     @Test
     public void testFromFile() throws Exception {
-        try (TikaInputStream tis = TikaInputStream.get(this.getClass().getResource(
-                "/test-documents/testODFwithOOo3.odt"))) {
-            assertEquals(true, tis.hasFile());
-            OpenDocumentParser parser = new OpenDocumentParser();
+        OpenDocumentParser parser = new OpenDocumentParser();
+        Path tmp = null;
+        try {
+            tmp = Files.createTempFile("test-odf-", ".odt");
+            Files.copy(getTestDocumentAsStream("testODFwithOOo3.odt"), tmp,
+                    StandardCopyOption.REPLACE_EXISTING);
             Metadata metadata = new Metadata();
+            TikaInputStream tis = TikaInputStream.get(tmp, metadata);
+            assertEquals(true, tis.hasFile());
             ContentHandler handler = new BodyContentHandler();
             parser.parse(tis, handler, metadata, new ParseContext());
 
@@ -323,25 +289,20 @@ public class ODFParserTest extends TikaTest {
 
             String content = handler.toString();
             assertContains("Tika is part of the Lucene project.", content);
+        } finally {
+            Files.delete(tmp);
         }
     }
-    
+
     @Test
     public void testNPEFromFile() throws Exception {
-        OpenDocumentParser parser = new OpenDocumentParser();
-        try (TikaInputStream tis = TikaInputStream.get(this.getClass().getResource(
-                "/test-documents/testNPEOpenDocument.odt"))) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            parser.parse(tis, handler, metadata, new ParseContext());
+        XMLResult r = getXML("testNPEOpenDocument.odt", new OpenDocumentParser());
+        assertEquals(
+                "application/vnd.oasis.opendocument.text",
+                r.metadata.get(Metadata.CONTENT_TYPE));
 
-            assertEquals(
-                    "application/vnd.oasis.opendocument.text",
-                    metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("primero hay que generar un par de claves", r.xml);
 
-            String content = handler.toString();
-            assertContains("primero hay que generar un par de claves", content);
-        }
     }
 
     // TIKA-1063: Test basic style support.
@@ -359,20 +320,17 @@ public class ODFParserTest extends TikaTest {
     //TIKA-1600: Test that null pointer doesn't break parsing.
     @Test
     public void testNullStylesInODTFooter() throws Exception {
-        Parser parser = new OpenDocumentParser();
-        try (InputStream input = ODFParserTest.class.getResourceAsStream("/test-documents/testODT-TIKA-6000.odt")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            parser.parse(input, handler, metadata, new ParseContext());
 
-            assertEquals("application/vnd.oasis.opendocument.text", metadata.get(Metadata.CONTENT_TYPE));
+        XMLResult r = getXML("testODT-TIKA-6000.odt", new OpenDocumentParser(), new Metadata(), new ParseContext());
 
-            String content = handler.toString();
+        assertEquals("application/vnd.oasis.opendocument.text", r.metadata.get(Metadata.CONTENT_TYPE));
+
+        String content = r.xml;
+
+        assertContains("Utilisation de ce document", content);
+        assertContains("Copyright and License", content);
+        assertContains("Changer la langue", content);
+        assertContains("La page d’accueil permet de faire une recherche simple", content);
 
-            assertContains("Utilisation de ce document", content);
-            assertContains("Copyright and License", content);
-            assertContains("Changer la langue", content);
-            assertContains("La page d’accueil permet de faire une recherche simple", content);
-        }
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index 365de77..dc75be5 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -17,15 +17,11 @@
 package org.apache.tika.parser.rtf;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
-import java.io.File;
-import java.io.FileInputStream;
 import java.io.InputStream;
-import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@@ -49,7 +45,6 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.WriteOutContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
@@ -62,117 +57,98 @@ public class RTFParserTest extends TikaTest {
 
     @Test
     public void testBasicExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTF.rtf");
 
-        Metadata metadata = new Metadata();
-        StringWriter writer = new StringWriter();
-        tika.getParser().parse(
-                new FileInputStream(file),
-                new WriteOutContentHandler(writer),
-                metadata,
-                new ParseContext());
-        String content = writer.toString();
-
-        assertEquals("application/rtf", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals(1, metadata.getValues(Metadata.CONTENT_TYPE).length);
-        assertContains("Test", content);
-        assertContains("indexation Word", content);
+        XMLResult r = getXML("testRTF.rtf");
+        assertEquals("application/rtf", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals(1, r.metadata.getValues(Metadata.CONTENT_TYPE).length);
+        assertContains("Test", r.xml);
+        assertContains("indexation Word", r.xml);
     }
 
     @Test
     public void testUmlautSpacesExtraction2() throws Exception {
-        String content = getText("testRTFUmlautSpaces2.rtf");
-        content = content.replaceAll("\\s+", "");
-        assertEquals("\u00DCbersicht", content);
+        assertContains("<p>\u00DCbersicht</p>",
+                getXML("testRTFUmlautSpaces2.rtf").xml);
     }
 
     @Test
     public void testUnicodeUCNControlWordCharacterDoublingExtraction() throws Exception {
-        String content = getText("testRTFUnicodeUCNControlWordCharacterDoubling.rtf");
+        XMLResult r = getXML("testRTFUnicodeUCNControlWordCharacterDoubling.rtf");
 
-        assertContains("\u5E74", content);
-        assertContains("\u5ff5", content);
-        assertContains("0 ", content);
-        assertContains("abc", content);
-        assertFalse("Doubled character \u5E74", content.contains("\u5E74\u5E74"));
+        assertContains("\u5E74", r.xml);
+        assertContains("\u5ff5", r.xml);
+        assertContains("0 ", r.xml);
+        assertContains("abc", r.xml);
+        assertNotContained("\u5E74\u5E74", r.xml);
     }
 
     @Test
     public void testHexEscapeInsideWord() throws Exception {
-        String content = getText("testRTFHexEscapeInsideWord.rtf");
-        assertContains("ESP\u00cdRITO", content);
+        XMLResult r = getXML("testRTFHexEscapeInsideWord.rtf");
+        assertContains("ESP\u00cdRITO", r.xml);
     }
 
     @Test
     public void testWindowsCodepage1250() throws Exception {
-        String content = getText("testRTFWindowsCodepage1250.rtf");
-        assertContains("za\u017c\u00f3\u0142\u0107 g\u0119\u015bl\u0105 ja\u017a\u0144", content);
-        assertContains("ZA\u017b\u00d3\u0141\u0106 G\u0118\u015aL\u0104 JA\u0179\u0143", content);
+        XMLResult r = getXML("testRTFWindowsCodepage1250.rtf");
+        assertContains("za\u017c\u00f3\u0142\u0107 g\u0119\u015bl\u0105 ja\u017a\u0144", r.xml);
+        assertContains("ZA\u017b\u00d3\u0141\u0106 G\u0118\u015aL\u0104 JA\u0179\u0143", r.xml);
     }
 
     @Test
     public void testTableCellSeparation() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTFTableCellSeparation.rtf");
-        String content = tika.parseToString(file);
-        content = content.replaceAll("\\s+", " ");
-        assertContains("a b c d \u00E4 \u00EB \u00F6 \u00FC", content);
+        String content = getXML("testRTFTableCellSeparation.rtf").xml;
+        content = content.replaceAll("(\\s|<\\/?p>)+", " ");
         assertContains("a b c d \u00E4 \u00EB \u00F6 \u00FC", content);
     }
 
     @Test
     public void testTableCellSeparation2() throws Exception {
-        String content = getText("testRTFTableCellSeparation2.rtf");
+        String content = getXML("testRTFTableCellSeparation2.rtf").xml.replaceAll("\\s+", " ");
         // TODO: why do we insert extra whitespace...?
-        content = content.replaceAll("\\s+", " ");
-        assertContains("Station Fax", content);
+        assertContains("Station</p> <p>Fax", content);
     }
 
     @Test
     public void testWordPadCzechCharactersExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTFWordPadCzechCharacters.rtf");
-        String s1 = tika.parseToString(file);
-        assertTrue(s1.contains("\u010Cl\u00E1nek t\u00FDdne"));
-        assertTrue(s1.contains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty"));
+        XMLResult r = getXML("testRTFWordPadCzechCharacters.rtf");
+        assertContains("\u010Cl\u00E1nek t\u00FDdne", r.xml);
+        assertContains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty", r.xml);
     }
 
     @Test
     public void testWord2010CzechCharactersExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTFWord2010CzechCharacters.rtf");
-        String s1 = tika.parseToString(file);
-        assertTrue(s1.contains("\u010Cl\u00E1nek t\u00FDdne"));
-        assertTrue(s1.contains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty"));
+        XMLResult r = getXML("testRTFWord2010CzechCharacters.rtf");
+        assertContains("\u010Cl\u00E1nek t\u00FDdne", r.xml);
+        assertContains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty", r.xml);
     }
 
     @Test
     public void testMS932Extraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTF-ms932.rtf");
-        String s1 = tika.parseToString(file);
-
+        XMLResult r = getXML("testRTF-ms932.rtf");
         // Hello in Japanese
-        assertTrue(s1.contains("\u3053\u3093\u306b\u3061\u306f"));
+        assertContains("\u3053\u3093\u306b\u3061\u306f", r.xml);
 
         // Verify title, since it was also encoded with MS932:
-        Result r = getResult("testRTF-ms932.rtf");
+        r = getXML("testRTF-ms932.rtf");
         assertEquals("\u30bf\u30a4\u30c8\u30eb", r.metadata.get(TikaCoreProperties.TITLE));
     }
 
     @Test
     public void testUmlautSpacesExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTFUmlautSpaces.rtf");
-        String s1 = tika.parseToString(file);
-        assertTrue(s1.contains("\u00DCbersicht"));
+        XMLResult r = getXML("testRTFUmlautSpaces.rtf");
+        assertContains("\u00DCbersicht", r.xml);
     }
 
     @Test
     public void testGothic() throws Exception {
-        String content = getText("testRTFUnicodeGothic.rtf");
-        assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
+        XMLResult r = getXML("testRTFUnicodeGothic.rtf");
+        assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", r.xml);
     }
 
     @Test
     public void testJapaneseText() throws Exception {
-        Result r = getResult("testRTFJapanese.rtf");
-        String content = r.text;
+        XMLResult r = getXML("testRTFJapanese.rtf");
 
         // Verify title -- this title uses upr escape inside
         // title info field:
@@ -183,17 +159,17 @@ public class RTFParserTest extends TikaTest {
         assertEquals("StarWriter", r.metadata.get(TikaCoreProperties.COMMENTS));
 
         // Special version of (GHQ)
-        assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+        assertContains("\uff08\uff27\uff28\uff31\uff09", r.xml);
 
         // 6 other characters
-        assertContains("\u6771\u4eac\u90fd\u4e09\u9df9\u5e02", content);
+        assertContains("\u6771\u4eac\u90fd\u4e09\u9df9\u5e02", r.xml);
     }
 
     @Test
     public void testMaxLength() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTFJapanese.rtf");
         Metadata metadata = new Metadata();
-        InputStream stream = TikaInputStream.get(file, metadata);
+        InputStream stream = TikaInputStream.get(
+                getTestDocumentAsStream("testRTFJapanese.rtf"));
 
         // Test w/ default limit:
         Tika localTika = new Tika();
@@ -204,7 +180,7 @@ public class RTFParserTest extends TikaTest {
 
         // Test setting max length on the instance:
         localTika.setMaxStringLength(200);
-        stream = TikaInputStream.get(file, metadata);
+        stream = TikaInputStream.get(getTestDocumentAsStream("testRTFJapanese.rtf"));
         content = localTika.parseToString(stream, metadata);
 
         // parseToString closes for convenience:
@@ -212,7 +188,7 @@ public class RTFParserTest extends TikaTest {
         assertTrue(content.length() <= 200);
 
         // Test setting max length per-call:
-        stream = TikaInputStream.get(file, metadata);
+        stream = TikaInputStream.get(getTestDocumentAsStream("testRTFJapanese.rtf"));
         content = localTika.parseToString(stream, metadata, 100);
         // parseToString closes for convenience:
         //stream.close();
@@ -221,14 +197,14 @@ public class RTFParserTest extends TikaTest {
 
     @Test
     public void testTextWithCurlyBraces() throws Exception {
-        String content = getText("testRTFWithCurlyBraces.rtf");
-        assertContains("{ some text inside curly brackets }", content);
+        XMLResult r = getXML("testRTFWithCurlyBraces.rtf");
+        assertContains("{ some text inside curly brackets }", r.xml);
     }
 
     @Test
     public void testControls() throws Exception {
-        Result r = getResult("testRTFControls.rtf");
-        String content = r.text;
+        XMLResult r = getXML("testRTFControls.rtf");
+        String content = r.xml;
         assertContains("Thiswordhasanem\u2014dash", content);
         assertContains("Thiswordhasanen\u2013dash", content);
         assertContains("Thiswordhasanon\u2011breakinghyphen", content);
@@ -241,8 +217,8 @@ public class RTFParserTest extends TikaTest {
 
     @Test
     public void testInvalidUnicode() throws Exception {
-        Result r = getResult("testRTFInvalidUnicode.rtf");
-        String content = r.text;
+        XMLResult r = getXML("testRTFInvalidUnicode.rtf");
+        String content = r.xml;
         assertContains("Unpaired hi \ufffd here", content);
         assertContains("Unpaired lo \ufffd here", content);
         assertContains("Mismatched pair \ufffd\ufffd here", content);
@@ -250,8 +226,8 @@ public class RTFParserTest extends TikaTest {
 
     @Test
     public void testVarious() throws Exception {
-        Result r = getResult("testRTFVarious.rtf");
-        String content = r.text;
+        XMLResult r = getXML("testRTFVarious.rtf");
+        String content = r.xml;
         assertContains("Footnote appears here", content);
         assertContains("This is a footnote.", content);
         assertContains("This is the header text.", content);
@@ -267,10 +243,10 @@ public class RTFParserTest extends TikaTest {
         assertContains("(Kramer)", content);
 
         // Table
-        assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", content.replaceAll("\\s+", " "));
+        assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", content.replaceAll("(\\s|<\\/?p>)+", " "));
 
         // 2-columns
-        assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", content.replaceAll("\\s+", " "));
+        assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", content.replaceAll("(\\s|<\\/?p>)+", " "));
         assertContains("This is a hyperlink", content);
         assertContains("Here is a list:", content);
         for (int row = 1; row <= 3; row++) {
@@ -393,17 +369,13 @@ public class RTFParserTest extends TikaTest {
     // TIKA-1192
     @Test
     public void testListOverride() throws Exception {
-        Result r = getResult("testRTFListOverride.rtf");
-        String content = r.text;
-        assertContains("Body", content);
+        assertContains("Body", getXML("testRTFListOverride.rtf").xml);
     }
 
     // TIKA-1305
     @Test
     public void testCorruptListOverride() throws Exception {
-        Result r = getResult("testRTFCorruptListOverride.rtf");
-        String content = r.text;
-        assertContains("apple", content);
+        assertContains("apple", getXML("testRTFCorruptListOverride.rtf").xml);
     }
 
     // TIKA-1010
@@ -565,31 +537,4 @@ public class RTFParserTest extends TikaTest {
         assertEquals(2, tracker.filenames.size());
     }
 
-    private Result getResult(String filename) throws Exception {
-        File file = getResourceAsFile("/test-documents/" + filename);
-
-        Metadata metadata = new Metadata();
-        StringWriter writer = new StringWriter();
-        tika.getParser().parse(
-                new FileInputStream(file),
-                new WriteOutContentHandler(writer),
-                metadata,
-                new ParseContext());
-        String content = writer.toString();
-        return new Result(content, metadata);
-    }
-
-    private String getText(String filename) throws Exception {
-        return getResult(filename).text;
-    }
-
-    private static class Result {
-        public final String text;
-        public final Metadata metadata;
-
-        public Result(String text, Metadata metadata) {
-            this.text = text;
-            this.metadata = metadata;
-        }
-    }
 }


[03/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
index 0521032..2830b5a 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
@@ -69,7 +69,7 @@ public class TikaResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/msword")
                 .accept("text/plain")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC));
+                .put(getTestDocumentAsStream(TEST_DOC));
         String responseMsg = getStringFromInputStream((InputStream) response
                 .getEntity());
         assertTrue(responseMsg.contains("test"));
@@ -90,7 +90,7 @@ public class TikaResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/vnd.ms-excel")
                 .accept("text/plain")
-                .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+                .put(getTestDocumentAsStream("password.xls"));
 
         assertEquals(UNPROCESSEABLE, response.getStatus());
     }
@@ -100,7 +100,7 @@ public class TikaResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/msword")
                 .accept("text/html")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC));
+                .put(getTestDocumentAsStream(TEST_DOC));
         String responseMsg = getStringFromInputStream((InputStream) response
                 .getEntity());
         assertTrue(responseMsg.contains("test"));
@@ -113,7 +113,7 @@ public class TikaResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/vnd.ms-excel")
                 .accept("text/html")
-                .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+                .put(getTestDocumentAsStream("password.xls"));
 
         assertEquals(UNPROCESSEABLE, response.getStatus());
     }
@@ -123,7 +123,7 @@ public class TikaResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/msword")
                 .accept("text/xml")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC));
+                .put(getTestDocumentAsStream(TEST_DOC));
         String responseMsg = getStringFromInputStream((InputStream) response
                 .getEntity());
         assertTrue(responseMsg.contains("test"));
@@ -134,7 +134,7 @@ public class TikaResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/vnd.ms-excel")
                 .accept("text/xml")
-                .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+                .put(getTestDocumentAsStream("password.xls"));
 
         assertEquals(UNPROCESSEABLE, response.getStatus());
     }
@@ -143,7 +143,8 @@ public class TikaResourceTest extends CXFTestBase {
     public void testSimpleWordMultipartXML() throws Exception {
         ClassLoader.getSystemResourceAsStream(TEST_DOC);
         Attachment attachmentPart =
-                new Attachment("myworddoc", "application/msword", ClassLoader.getSystemResourceAsStream(TEST_DOC));
+                new Attachment("myworddoc", "application/msword",
+                        getTestDocumentAsStream(TEST_DOC));
         WebClient webClient = WebClient.create(endPoint + TIKA_PATH + "/form");
         Response response = webClient.type("multipart/form-data")
                 .accept("text/xml")
@@ -161,7 +162,7 @@ public class TikaResourceTest extends CXFTestBase {
         //first try text
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .accept("text/plain")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         String responseMsg = getStringFromInputStream((InputStream) response
                 .getEntity());
         assertTrue(responseMsg.contains("Course of human events"));
@@ -169,7 +170,7 @@ public class TikaResourceTest extends CXFTestBase {
         //now go for xml -- different call than text
         response = WebClient.create(endPoint + TIKA_PATH)
                 .accept("text/xml")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+                .put(getTestDocumentAsStream(TEST_RECURSIVE_DOC));
         responseMsg = getStringFromInputStream((InputStream) response
                 .getEntity());
         assertTrue(responseMsg.contains("Course of human events"));
@@ -185,9 +186,9 @@ public class TikaResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + TIKA_PATH)
                 .type("application/rtf")
                 .accept("text/plain")
-                .put(ClassLoader.getSystemResourceAsStream("testRTF_npeFromWMFInTikaServer.rtf"));
+                .put(getTestDocumentAsStream("testRTF_npeFromWMFInTikaServer.rtf"));
         String responseMsg = getStringFromInputStream((InputStream) response
                 .getEntity());
-        assertTrue(responseMsg.contains("Example text"));
+        assertContains("Example text", responseMsg);
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
index b883c96..a61bf52 100644
--- a/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
@@ -87,7 +87,7 @@ public class UnpackerResourceTest extends CXFTestBase {
     public void testDocWAV() throws Exception {
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .type(APPLICATION_MSWORD).accept("application/zip")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+                .put(getTestDocumentAsStream(TEST_DOC_WAV));
 
         Map<String, String> data = readZipArchive((InputStream) response.getEntity());
         assertEquals(WAV1_MD5, data.get(WAV1_NAME));
@@ -99,7 +99,7 @@ public class UnpackerResourceTest extends CXFTestBase {
     public void testDocWAVText() throws Exception {
         Response response = WebClient.create(endPoint + ALL_PATH)
                 .type(APPLICATION_MSWORD).accept("application/zip")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+                .put(getTestDocumentAsStream(TEST_DOC_WAV));
 
         Map<String, String> data = readZipArchive((InputStream) response.getEntity());
         assertEquals(WAV1_MD5, data.get(WAV1_NAME));
@@ -111,7 +111,7 @@ public class UnpackerResourceTest extends CXFTestBase {
     public void testDocPicture() throws Exception {
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .type(APPLICATION_MSWORD).accept("application/zip")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+                .put(getTestDocumentAsStream(TEST_DOC_WAV));
 
         Map<String, String> data = readZipArchive((InputStream) response.getEntity());
 
@@ -122,7 +122,7 @@ public class UnpackerResourceTest extends CXFTestBase {
     public void testDocPictureNoOle() throws Exception {
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .type(APPLICATION_MSWORD).accept("application/zip")
-                .put(ClassLoader.getSystemResourceAsStream("2pic.doc"));
+                .put(getTestDocumentAsStream("2pic.doc"));
 
         Map<String, String> data = readZipArchive((InputStream) response.getEntity());
         assertEquals(JPG2_MD5, data.get(JPG2_NAME));
@@ -132,7 +132,7 @@ public class UnpackerResourceTest extends CXFTestBase {
     public void testImageDOCX() throws Exception {
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .accept("application/zip").put(
-                        ClassLoader.getSystemResourceAsStream(TEST_DOCX_IMAGE));
+                        getTestDocumentAsStream(TEST_DOCX_IMAGE));
 
         Map<String, String> data = readZipArchive((InputStream) response.getEntity());
         assertEquals(DOCX_IMAGE1_MD5, data.get(DOCX_IMAGE1_NAME));
@@ -144,7 +144,7 @@ public class UnpackerResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .type("xxx/xxx")
                 .accept("*/*")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+                .put(getTestDocumentAsStream(TEST_DOC_WAV));
 
         assertEquals(415, response.getStatus());
     }
@@ -154,7 +154,7 @@ public class UnpackerResourceTest extends CXFTestBase {
         String TEST_DOCX_EXE = "2exe.docx";
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .accept("application/zip")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOCX_EXE));
+                .put(getTestDocumentAsStream(TEST_DOCX_EXE));
 
         Map<String, String> data = readZipArchive((InputStream) response.getEntity());
 
@@ -166,7 +166,7 @@ public class UnpackerResourceTest extends CXFTestBase {
     public void testImageXSL() throws Exception {
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .accept("application/zip")
-                .put(ClassLoader.getSystemResourceAsStream("pic.xls"));
+                .put(getTestDocumentAsStream("pic.xls"));
 
         Map<String, String> data = readZipArchive((InputStream) response.getEntity());
         assertEquals(XSL_IMAGE1_MD5, data.get("0.jpg"));
@@ -177,7 +177,7 @@ public class UnpackerResourceTest extends CXFTestBase {
     public void testTarDocPicture() throws Exception {
         Response response = WebClient.create(endPoint + UNPACKER_PATH)
                 .type(APPLICATION_MSWORD).accept("application/x-tar")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+                .put(getTestDocumentAsStream(TEST_DOC_WAV));
 
         Map<String, String> data = readArchiveFromStream(new TarArchiveInputStream((InputStream) response.getEntity()));
 
@@ -189,7 +189,7 @@ public class UnpackerResourceTest extends CXFTestBase {
         Response response = WebClient.create(endPoint + ALL_PATH)
                 .header(CONTENT_TYPE, APPLICATION_XML)
                 .accept("application/zip")
-                .put(ClassLoader.getSystemResourceAsStream("test.doc"));
+                .put(getTestDocumentAsStream("test.doc"));
 
         String responseMsg = readArchiveText((InputStream) response.getEntity());
         assertNotNull(responseMsg);

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/2exe.docx
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/2exe.docx b/tika-server/src/test/resources/2exe.docx
deleted file mode 100644
index 64cfbe1..0000000
Binary files a/tika-server/src/test/resources/2exe.docx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/2pic.doc
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/2pic.doc b/tika-server/src/test/resources/2pic.doc
deleted file mode 100644
index 75c53b3..0000000
Binary files a/tika-server/src/test/resources/2pic.doc and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/2pic.docx
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/2pic.docx b/tika-server/src/test/resources/2pic.docx
deleted file mode 100644
index fe424e4..0000000
Binary files a/tika-server/src/test/resources/2pic.docx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/CDEC_WEATHER_2010_03_02
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/CDEC_WEATHER_2010_03_02 b/tika-server/src/test/resources/CDEC_WEATHER_2010_03_02
deleted file mode 100644
index c50e8e7..0000000
--- a/tika-server/src/test/resources/CDEC_WEATHER_2010_03_02
+++ /dev/null
@@ -1,98 +0,0 @@
-Station ID	Start Date	Date	Time	Temp	Cond	Depth	DO	Flow	WXT510P	Latitude	Longitude
-SMN	03/02/2010	03/01/2010	23:00	14.5	791.00	53.00	7.5	1460		37.347214	-120.976181
-SMN	03/02/2010	03/01/2010	23:15	14.5	790.00	52.99	7.5	1450		37.347214	-120.976181
-SMN	03/02/2010	03/01/2010	23:30	14.5	788.00	53.03	7.4	1480		37.347214	-120.976181
-SMN	03/02/2010	03/01/2010	23:45	14.5	790.00	53.03	7.4	1480		37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	00:00	14.5	785.00	53.02	7.4	1470	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	00:15	14.5	786.00	53.00	7.4	1460	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	00:30	14.5	790.00	53.04	7.4	1480	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	00:45	14.5	792.00	53.02	7.3	1470	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	01:00	14.5	786.00	53.03	7.3	1480	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	01:15	14.5	787.00	53.03	7.3	1480	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	01:30	14.5	791.00	53.03	7.3	1480	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	01:45	14.5	789.00	53.04	7.3	1480	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	02:00	14.5	794.00	53.06	7.2	1490	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	02:15	14.4	801.00	53.06	7.2	1490	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	02:30	14.4	802.00	53.04	7.2	1480	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	02:45	14.4	803.00	53.07	7.2	1500	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	03:00	14.4	802.00	53.06	7.2	1490	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	03:15	14.4	803.00	53.08	7.2	1500	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	03:30	14.4	806.00	53.06	7.2	1490	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	03:45	14.4	807.00	53.08	7.1	1500	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	04:00	14.4	810.00	53.09	7.1	1510	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	04:15	14.4	810.00	53.10	7.1	1520	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	04:30	14.3	808.00	53.11	7.1	1520	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	04:45	14.3	810.00	53.11	7.1	1520	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	05:00	14.3	813.00	53.11	7.0	1520	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	05:15	14.3	811.00	53.11	7.0	1520	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	05:30	14.3	810.00	53.10	7.0	1520	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	05:45	14.3	805.00	53.12	7.0	1530	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	06:00	14.2	806.00	53.10	7.0	1520	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	06:15	14.2	805.00	53.12	7.0	1530	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	06:30	14.2	808.00	53.14	6.9	1540	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	06:45	14.2	809.00	53.14	6.9	1540	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	07:00	14.2	803.00	53.13	6.9	1530	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	07:15	14.2	807.00	53.13	6.9	1530	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	07:30	14.2	805.00	53.14	6.9	1540	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	07:45	14.2	811.00	53.14	6.9	1540	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	08:00	14.2	815.00	53.15	6.9	1540	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	08:15	14.3	817.00	53.13	6.9	1530	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	08:30	14.3	817.00	53.15	6.9	1540	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	08:45	14.3	811.00	53.16	6.8	1550	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	09:00	14.3	810.00	53.17	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	09:15	14.3	809.00	53.16	6.9	1550	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	09:30	14.3	813.00	53.18	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	09:45	14.3	813.00	53.17	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	10:00	14.3	813.00	53.19	6.9	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	10:15	14.3	820.00	53.17	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	10:30	14.3	818.00	53.18	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	10:45	14.3	821.00	53.19	6.9	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	11:00	14.3	821.00	53.18	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	11:15	14.3	825.00	53.18	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	11:30	14.3	827.00	53.17	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	11:45	14.3	825.00	53.18	6.9	1560	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	12:00	14.3	829.00	53.19	6.9	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	12:15	14.4	831.00	53.20	6.9	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	12:30	14.4	837.00	53.20	7.0	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	12:45	14.4	835.00	53.20	7.0	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	13:00	14.5	837.00	53.21	7.0	1580	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	13:15	14.5	837.00	53.20	7.0	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	13:30	14.5	842.00	53.20	7.0	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	13:45	14.5	848.00	53.22	7.0	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	14:00	14.5	850.00	53.20	7.0	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	14:15	14.5	851.00	53.20	7.0	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	14:30	14.5	849.00	53.20	7.1	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	14:45	14.6	858.00	53.20	7.1	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	15:00	14.6	869.00	53.20	7.1	1570	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	15:15	14.6	868.00	53.22	7.1	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	15:30	14.5	868.00	53.23	7.1	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	15:45	14.5	869.00	53.22	7.1	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	16:00	14.5	873.00	53.22	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	16:15	14.5	877.00	53.23	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	16:30	14.5	884.00	53.23	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	16:45	14.5	887.00	53.23	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	17:00	14.5	889.00	53.22	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	17:15	14.5	891.00	53.25	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	17:30	14.4	893.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	17:45	14.4	896.00	53.23	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	18:00	14.4	896.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	18:15	14.4	895.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	18:30	14.4	899.00	53.25	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	18:45	14.3	901.00	53.25	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	19:00	14.3	899.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	19:15	14.3	911.00	53.25	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	19:30	14.3	914.00	53.26	7.2	1610	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	19:45	14.3	913.00	53.22	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	20:00	14.3	914.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	20:15	14.2	915.00	53.22	7.3	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	20:30	14.2	917.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	20:45	14.2	919.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	21:00	14.2	919.00	53.23	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	21:15	14.2	923.00	53.21	7.2	1580	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	21:30	14.2	920.00	53.24	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	21:45	14.2	927.00	53.25	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	22:00	14.2	929.00	53.23	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	22:15	14.1	927.00	53.25	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	22:30	14.1	931.00	53.22	7.2	1590	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	22:45	14.1	931.00	53.25	7.2	1600	760	37.347214	-120.976181
-SMN	03/02/2010	03/02/2010	23:00	14.1	937.00	53.23	7.2	1590	760	37.347214	-120.976181

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/Doc1_ole.doc
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/Doc1_ole.doc b/tika-server/src/test/resources/Doc1_ole.doc
deleted file mode 100644
index 953fe78..0000000
Binary files a/tika-server/src/test/resources/Doc1_ole.doc and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/english.txt
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/english.txt b/tika-server/src/test/resources/english.txt
deleted file mode 100644
index 5e3d20e..0000000
--- a/tika-server/src/test/resources/english.txt
+++ /dev/null
@@ -1 +0,0 @@
-This is English!

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/foo.csv
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/foo.csv b/tika-server/src/test/resources/foo.csv
deleted file mode 100644
index 0f48f3e..0000000
--- a/tika-server/src/test/resources/foo.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-foo,bar,baz
-123,"abc def",-987
-456,"qwertyuiop",98765
-789,"qawsedrft",3.14159

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/french.txt
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/french.txt b/tika-server/src/test/resources/french.txt
deleted file mode 100644
index 678e6c2..0000000
--- a/tika-server/src/test/resources/french.txt
+++ /dev/null
@@ -1 +0,0 @@
-c’est comme ci comme ça

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/mime/custom-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/mime/custom-mimetypes.xml b/tika-server/src/test/resources/mime/custom-mimetypes.xml
deleted file mode 100644
index 78cf392..0000000
--- a/tika-server/src/test/resources/mime/custom-mimetypes.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-
-<mime-info>
-    <mime-type type="application/evil">
-        <glob pattern="*.evil"/>
-        <sub-class-of type="text/plain"/>
-    </mime-type>
-</mime-info>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/mock/null_pointer.xml
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/mock/null_pointer.xml b/tika-server/src/test/resources/mock/null_pointer.xml
deleted file mode 100644
index 80043c0..0000000
--- a/tika-server/src/test/resources/mock/null_pointer.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">some content</write>
-    <throw class="java.lang.NullPointerException">null pointer message</throw>
-</mock>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml b/tika-server/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
new file mode 100644
index 0000000..b3ddc83
--- /dev/null
+++ b/tika-server/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<mime-info>
+    <mime-type type="application/evil">
+        <glob pattern="*.evil"/>
+        <sub-class-of type="text/plain"/>
+    </mime-type>
+</mime-info>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/password.xls
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/password.xls b/tika-server/src/test/resources/password.xls
deleted file mode 100644
index a6ad86a..0000000
Binary files a/tika-server/src/test/resources/password.xls and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/pic.xls
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/pic.xls b/tika-server/src/test/resources/pic.xls
deleted file mode 100644
index 6798ae2..0000000
Binary files a/tika-server/src/test/resources/pic.xls and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/pic.xlsx
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/pic.xlsx b/tika-server/src/test/resources/pic.xlsx
deleted file mode 100644
index 9cc155a..0000000
Binary files a/tika-server/src/test/resources/pic.xlsx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/test.doc
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/test.doc b/tika-server/src/test/resources/test.doc
deleted file mode 100644
index 93198c8..0000000
Binary files a/tika-server/src/test/resources/test.doc and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/testRTF_npeFromWMFInTikaServer.rtf
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/testRTF_npeFromWMFInTikaServer.rtf b/tika-server/src/test/resources/testRTF_npeFromWMFInTikaServer.rtf
deleted file mode 100644
index a5870e5..0000000
--- a/tika-server/src/test/resources/testRTF_npeFromWMFInTikaServer.rtf
+++ /dev/null
@@ -1,235 +0,0 @@
-{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff0\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang2057\deflangfe2057\themelang2057\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
-{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}{\f171\fbidi \froman\fcharset0\fprq2{\*\panose 02040602050305030304}Book Antiqua;}
-{\f318\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0603030504020204}Humnst777 BT{\*\falt Lucida Sans Unicode};}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
-{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria;}
-{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
-{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}
-{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f319\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f320\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
-{\f322\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f323\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f324\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f325\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
-{\f326\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f327\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f329\fbidi \fswiss\fcharset238\fprq2 Arial CE;}{\f330\fbidi \fswiss\fcharset204\fprq2 Arial Cyr;}
-{\f332\fbidi \fswiss\fcharset161\fprq2 Arial Greek;}{\f333\fbidi \fswiss\fcharset162\fprq2 Arial Tur;}{\f334\fbidi \fswiss\fcharset177\fprq2 Arial (Hebrew);}{\f335\fbidi \fswiss\fcharset178\fprq2 Arial (Arabic);}
-{\f336\fbidi \fswiss\fcharset186\fprq2 Arial Baltic;}{\f337\fbidi \fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f659\fbidi \froman\fcharset238\fprq2 Cambria Math CE;}{\f660\fbidi \froman\fcharset204\fprq2 Cambria Math Cyr;}
-{\f662\fbidi \froman\fcharset161\fprq2 Cambria Math Greek;}{\f663\fbidi \froman\fcharset162\fprq2 Cambria Math Tur;}{\f666\fbidi \froman\fcharset186\fprq2 Cambria Math Baltic;}{\f667\fbidi \froman\fcharset163\fprq2 Cambria Math (Vietnamese);}
-{\f2029\fbidi \froman\fcharset238\fprq2 Book Antiqua CE;}{\f2030\fbidi \froman\fcharset204\fprq2 Book Antiqua Cyr;}{\f2032\fbidi \froman\fcharset161\fprq2 Book Antiqua Greek;}{\f2033\fbidi \froman\fcharset162\fprq2 Book Antiqua Tur;}
-{\f2036\fbidi \froman\fcharset186\fprq2 Book Antiqua Baltic;}{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
-{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
-{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
-{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
-{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
-{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \froman\fcharset238\fprq2 Cambria CE;}
-{\fhimajor\f31529\fbidi \froman\fcharset204\fprq2 Cambria Cyr;}{\fhimajor\f31531\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\fhimajor\f31532\fbidi \froman\fcharset162\fprq2 Cambria Tur;}
-{\fhimajor\f31535\fbidi \froman\fcharset186\fprq2 Cambria Baltic;}{\fhimajor\f31536\fbidi \froman\fcharset163\fprq2 Cambria (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
-{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
-{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
-{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
-{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
-{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
-{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
-{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
-{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}
-{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}
-{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
-{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
-{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
-{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;
-\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\*\defchp \fs22 }{\*\defpap 
-\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 
-\ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\s1\ql \li0\ri0\keepn\widctlpar\tx5670\tx8222\wrapdefault\faauto\outlinelevel0\rin0\lin0\itap0 \rtlch\fcs1 \ab\af1\afs24\alang1025 \ltrch\fcs0 
-\b\f1\fs20\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext0 \slink15 \sqformat heading 1;}{\s2\ql \li0\ri0\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel1\adjustright\rin0\lin0\itap0 \rtlch\fcs1 
-\ab\af1\afs28\alang1025 \ltrch\fcs0 \b\f1\fs28\lang2057\langfe2057\cgrid\langnp2057\langfenp2057 \sbasedon0 \snext0 \slink16 \sqformat heading 2;}{\s4\ql \li5670\ri0\keepn\widctlpar\tx5670\tx7371\wrapdefault\faauto\outlinelevel3\rin0\lin5670\itap0 
-\rtlch\fcs1 \ab\af1\afs16\alang1025 \ltrch\fcs0 \b\f1\fs16\lang2057\langfe2057\cgrid\langnp2057\langfenp2057 \sbasedon0 \snext0 \slink17 \sqformat heading 4;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\*
-\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblind0\tblindtype3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa200\sl276\slmult1
-\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs22\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe2057\cgrid\langnp2057\langfenp2057 \snext11 \ssemihidden \sunhideused \sqformat Normal Table;}{\*\cs15 \additive 
-\rtlch\fcs1 \ab\af31503\afs32 \ltrch\fcs0 \b\fs32\lang0\langfe1033\kerning32\loch\f31502\hich\af31502\dbch\af31501\langnp0\langfenp1033 \sbasedon10 \slink1 \slocked \spriority9 Heading 1 Char;}{\*\cs16 \additive \rtlch\fcs1 \ab\ai\af31503\afs28 
-\ltrch\fcs0 \b\i\fs28\lang0\langfe1033\loch\f31502\hich\af31502\dbch\af31501\langnp0\langfenp1033 \sbasedon10 \slink2 \slocked \ssemihidden \spriority9 Heading 2 Char;}{\*\cs17 \additive \rtlch\fcs1 \ab\af31507\afs28 \ltrch\fcs0 
-\b\fs28\lang0\langfe1033\loch\f31506\hich\af31506\dbch\af31505\langnp0\langfenp1033 \sbasedon10 \slink4 \slocked \ssemihidden \spriority9 Heading 4 Char;}{\s18\ql \li0\ri0\widctlpar
-\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext18 \slink19 header;}{\*\cs19 \additive \rtlch\fcs1 
-\af0\afs24 \ltrch\fcs0 \fs24\lang0\langfe1033\langnp0\langfenp1033 \sbasedon10 \slink18 \slocked \ssemihidden Header Char;}{\s20\ql \li0\ri0\widctlpar\tx3402\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 
-\ab\af1\afs24\alang1025 \ltrch\fcs0 \b\f1\fs20\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext20 \slink21 Body Text;}{\*\cs21 \additive \rtlch\fcs1 \af0\afs24 \ltrch\fcs0 \fs24\lang0\langfe1033\langnp0\langfenp1033 
-\sbasedon10 \slink20 \slocked \ssemihidden Body Text Char;}{\s22\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 
-\fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 \sbasedon0 \snext22 \slink23 \styrsid14506524 footer;}{\*\cs23 \additive \rtlch\fcs1 \af0\afs24 \ltrch\fcs0 \fs24\lang0\langfe1033\langnp0\langfenp1033 \sbasedon10 \slink22 \slocked \ssemihidden 
-Footer Char;}}{\*\rsidtbl \rsid69694\rsid615335\rsid817088\rsid1394934\rsid1968554\rsid2362503\rsid2504751\rsid2508965\rsid3497332\rsid3954968\rsid4262707\rsid4459777\rsid4947815\rsid5249973\rsid5375126\rsid5768946\rsid6625584\rsid6695929\rsid7547824
-\rsid7568219\rsid7681002\rsid7756842\rsid8788056\rsid9179382\rsid9185548\rsid9589441\rsid9716173\rsid10108489\rsid10158374\rsid10170376\rsid10447577\rsid10506307\rsid10508481\rsid11937854\rsid12735407\rsid14506524\rsid15223573\rsid15351889\rsid15429861
-\rsid15800823\rsid16209942\rsid16329808\rsid16338741\rsid16531520}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\title Cardiff}{\author A Other}
-{\operator Ian Williams}{\creatim\yr2016\mo2\dy1\hr16\min12}{\revtim\yr2016\mo2\dy1\hr16\min12}{\version2}{\edmins2}{\nofpages1}{\nofwords6}{\nofchars37}{\*\company Cardiff}{\nofcharsws42}{\vern32773}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/off
-ice/word/2003/wordml}}\paperw11906\paperh16838\margl851\margr851\margt567\margb794\gutter0\ltrsect 
-\widowctrl\ftnbj\aenddoc\trackmoves1\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen
-\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace120\dgvspace180\dghorigin851\dgvorigin567\dghshow2\dgvshow1
-\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nojkernpunct\rsidroot15429861 \fet0{\*\wgrffmtfilter 013f}\ilfomacatclnup0{\*\template 
-C:\\PMS\\DOCUMENT\\gplnew.dot}{\*\ftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 
-{\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid5249973 \chftnsep 
-\par }}{\*\ftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 
-\ltrch\fcs0 \insrsid5249973 \chftnsepc 
-\par }}{\*\aftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 
-\ltrch\fcs0 \insrsid5249973 \chftnsep 
-\par }}{\*\aftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 
-\ltrch\fcs0 \insrsid5249973 \chftnsepc 
-\par }}\ltrpar \sectd \ltrsect\linex0\headery709\footery709\colsx708\endnhere\sectlinegrid360\sectdefaultcl\sectrsid3497332\sftnbj {\headerl \ltrpar \pard\plain \ltrpar\s18\ql \li0\ri0\widctlpar
-\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
-\par }}{\headerr \ltrpar \pard\plain \ltrpar\s18\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
-\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
-\par }}{\footerl \ltrpar \pard\plain \ltrpar\s22\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
-\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
-\par }}{\footerr \ltrpar \pard\plain \ltrpar\s22\qr \li-284\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin-284\itap0\pararsid14506524 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 
-\fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af0\afs16 \ltrch\fcs0 \f318\fs16\insrsid1968554 {\pict{\*\picprop\shplid1026{\sp{\sn shapeType}{\sv 1}}{\sp{\sn fFlipH}{\sv 0}}
-{\sp{\sn fFlipV}{\sv 0}}{\sp{\sn fillColor}{\sv 3355443}}{\sp{\sn fRecolorFillAsPicture}{\sv 0}}{\sp{\sn fUseShapeAnchor}{\sv 0}}{\sp{\sn fFilled}{\sv 1}}{\sp{\sn fLine}{\sv 0}}{\sp{\sn pctHR}{\sv 0}}{\sp{\sn alignHR}{\sv 1}}{\sp{\sn dxHeightHR}{\sv 20}}
-{\sp{\sn dxWidthHR}{\sv 10943}}{\sp{\sn fLayoutInCell}{\sv 1}}{\sp{\sn fStandardHR}{\sv 1}}{\sp{\sn fNoshadeHR}{\sv 1}}{\sp{\sn fHorizRule}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 1}}}\picscalex1094\picscaley4\piccropl0\piccropr0\piccropt0\piccropb0
-\picw1764\pich882\picwgoal1000\pichgoal500\wmetafile8\bliptag667904020\blipupi71{\*\blipuid 27cf68149ca99ab95f958a7b62da888e}010009000003dd02000006001202000000001202000026060f001a04574d464301000000000001003e050000000001000000f803000000000000f80300000100
-00006c000000ffffffffffffffff111100002c00000000000000000000003e480000b900000020454d4600000100f80300001d00000003000000000000000000
-000000000000981200009f1a0000ca0000002101000000000000000000000000000023130300f6660400160000000c000000180000000a000000100000000000
-0000000000000900000010000000111100002c000000250000000c0000000e000080250000000c0000000e000080120000000c00000001000000520000007001
-000001000000a4ffffff000000000000000000000000900100000000000004400022430061006c00690062007200690000000000000000000000000000000000
-0000000000000000000000000000000000000000000000000000000000000000000000001900304e19001000000094511900144f1900fa4e5966945119008c4e
-190010000000fc4f190078511900cc4e5966945119008c4e1900200000008a790c5f8c4e19009451190020000000ffffffffdc008900057a0c5fffffffffffff
-0180ffff01809f020180ffffffff00420000000800000008000018f18d1001000000000000005802000025000000372e90010000020f0502020204030204ff02
-00e0ffac004001000000000000009f01000000000000430061006c006900620072000000000020ebf70486d759667a68466dbc008900306c8000c04e19009832
-055f1f00000001000000fc4e1900fc4e1900907b035f1f000000244f1900dc0089006476000800000000250000000c00000001000000250000000c0000000100
-0000250000000c00000001000000180000000c0000000000000254000000540000000000000000000000350000002b000000010000005fcc87405eb387400000
-000057000000010000004c000000040000000000000000000000111100002c00000050000000200000003600000046000000280000001c000000474449430200
-0000ffffffffffffffff111100002c000000000000002100000008000000620000000c0000000100000024000000240000000000803e00000000000000000000
-803e000000000000000002000000270000001800000002000000000000003333330000000000250000000c00000002000000250000000c000000080000805600
-000030000000ffffffffffffffff111100002c00000005000000fefffefffeffad004144ad004144fefffefffeff250000000c00000007000080250000000c00
-000000000080240000002400000000008040000000000000000000008040000000000000000002000000220000000c000000ffffffff46000000140000000800
-00004744494303000000250000000c0000000e000080250000000c0000000e0000800e0000001400000000000000100000001400000004000000030108000500
-00000b0200000000050000000c0205000c02040000002e0118001c000000fb020200010000000000bc02000000000102022253797374656d003f00003f3f0000
-0000000000000000000001003f3f3f3f3f00040000002d010000040000002d01000004000000020101001c000000fb02f5ff0000000000009001000000000440
-002243616c6962726900000000000000000000000000000000000000000000000000040000002d010100040000002d010100040000002d010100050000000902
-000000020d000000320a0a00000001000400000000000e02050020000600030000001e0007000000fc020000333333000000040000002d01020008000000fa02
-050000000000ffffff00040000002d0103000e0000002403050000000000000005000e0205000e0200000000000008000000fa0200000000000000000000040000002d01040007000000fc020000ffffff000000040000002d010500040000002701ffff040000002d010000040000002d010000030000000000}}{
-\rtlch\fcs1 \af0 \ltrch\fcs0 \f318\ul\insrsid5249973 
-\par }\pard \ltrpar\s22\ql \li-284\ri0\widctlpar\tx3075\tx3119\tx7655\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin-284\itap0\pararsid16338741 {\rtlch\fcs1 \af0\afs14 \ltrch\fcs0 \b\f318\fs14\insrsid2504751 Example footer}{\rtlch\fcs1 \af0\afs14 
-\ltrch\fcs0 \f318\fs14\insrsid5249973 
-\par }}{\headerf \ltrpar \pard\plain \ltrpar\s18\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs22\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
-\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
-\par }}{\footerf \ltrpar \pard\plain \ltrpar\s22\ql \li0\ri0\widctlpar\tqc\tx4153\tqr\tx8306\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {
-\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid2504751 
-\par }}{\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}
-{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8
-\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\ltrrow\trowd \irow0\irowband0\ltrrow
-\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
-\cltxlrtb\clftsWidth3\clwWidth5508\clshdrawnil \cellx5400\clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl \cltxlrtb\clftsWidth3\clwWidth4912\clshdrawnil \cellx10312\pard\plain \ltrpar\ql \li0\ri0\widctlpar\intbl
-\tx5670\tx8222\wrapdefault\faauto\rin0\lin0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 \fs24\lang2057\langfe1033\cgrid\langnp2057\langfenp1033 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\cf1\lang2057\langfe2057\langfenp2057\insrsid6695929 \cell 
-}\pard \ltrpar\qc \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\pararsid16338741 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\lang2057\langfe2057\langfenp2057\insrsid6695929 \cell }\pard \ltrpar
-\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 {\rtlch\fcs1 \af1 \ltrch\fcs0 \f1\fs20\insrsid6695929 \trowd \irow0\irowband0\ltrrow
-\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
-\cltxlrtb\clftsWidth3\clwWidth5508\clshdrawnil \cellx5400\clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl \cltxlrtb\clftsWidth3\clwWidth4912\clshdrawnil \cellx10312\row \ltrrow}\trowd \irow1\irowband1\lastrow \ltrrow
-\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
-\cltxlrtb\clftsWidth3\clwWidth10420\clshdrawnil \cellx10312\pard \ltrpar\ql \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs8\lang2057\langfe2057\langfenp2057\insrsid6695929 
-\cell }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 {\rtlch\fcs1 \af1 \ltrch\fcs0 \f1\fs20\insrsid6695929 \trowd \irow1\irowband1\lastrow \ltrrow
-\ts11\trgaph108\trleft-108\trkeep\trftsWidth1\trftsWidthB3\trftsWidthA3\trpaddl108\trpaddr108\trpaddfl3\trpaddfr3\tblind0\tblindtype3 \clvertalc\clbrdrt\brdrtbl \clbrdrl\brdrtbl \clbrdrb\brdrtbl \clbrdrr\brdrtbl 
-\cltxlrtb\clftsWidth3\clwWidth10420\clshdrawnil \cellx10312\row }\pard \ltrpar\qj \li0\ri0\widctlpar\tx0\wrapdefault\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0\pararsid15429861 {\rtlch\fcs1 \af0 \ltrch\fcs0 \f171\fs20\insrsid2504751 
-Example text to extract from RTF.}{\rtlch\fcs1 \af0 \ltrch\fcs0 \f171\fs20\insrsid15429861 
-\par 
-\par 
-\par }{\rtlch\fcs1 \af0 \ltrch\fcs0 \b\f171\fs20\ul\insrsid15429861 
-\par }{\rtlch\fcs1 \af0 \ltrch\fcs0 \b\f171\fs20\ul\insrsid15429861\charrsid4947815 
-\par }{\*\themedata 504b030414000600080000002100828abc13fa0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb6ac3301045f785fe83d0b6d8
-72ba28a5d8cea249777d2cd20f18e4b12d6a8f843409c9df77ecb850ba082d74231062ce997b55ae8fe3a00e1893f354e9555e6885647de3a8abf4fbee29bbd7
-2a3150038327acf409935ed7d757e5ee14302999a654e99e393c18936c8f23a4dc072479697d1c81e51a3b13c07e4087e6b628ee8cf5c4489cf1c4d075f92a0b
-44d7a07a83c82f308ac7b0a0f0fbf90c2480980b58abc733615aa2d210c2e02cb04430076a7ee833dfb6ce62e3ed7e14693e8317d8cd0433bf5c60f53fea2fe7
-065bd80facb647e9e25c7fc421fd2ddb526b2e9373fed4bb902e182e97b7b461e6bfad3f010000ffff0300504b030414000600080000002100a5d6a7e7c00000
-00360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4fc7060abb08
-84a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b63095120f88d94fbc
-52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462a1a82fe353
-bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f7468656d652f7468
-656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b4b0d592c9c
-070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b4757e8d3f7
-29e245eb2b260a0238fd010000ffff0300504b03041400060008000000210096b5ade296060000501b0000160000007468656d652f7468656d652f7468656d65
-312e786d6cec594f6fdb3614bf0fd87720746f6327761a07758ad8b19b2d4d1bc46e871e698996d850a240d2497d1bdae38001c3ba618715d86d87615b8116d8
-a5fb34d93a6c1dd0afb0475292c5585e9236d88aad3e2412f9e3fbff1e1fa9abd7eec70c1d1221294fda5efd72cd4324f1794093b0eddd1ef62fad79482a9c04
-98f184b4bd2991deb58df7dfbb8ad755446282607d22d771db8b944ad79796a40fc3585ee62949606ecc458c15bc8a702910f808e8c66c69b9565b5d8a314d3c
-94e018c8de1a8fa94fd05093f43672e23d06af89927ac06762a049136785c10607758d9053d965021d62d6f6804fc08f86e4bef210c352c144dbab999fb7b471
-7509af678b985ab0b6b4ae6f7ed9ba6c4170b06c788a705430adf71bad2b5b057d03606a1ed7ebf5babd7a41cf00b0ef83a6569632cd467faddec9699640f671
-9e76b7d6ac355c7c89feca9cccad4ea7d36c65b258a206641f1b73f8b5da6a6373d9c11b90c537e7f08dce66b7bbeae00dc8e257e7f0fd2badd5868b37a088d1
-e4600ead1ddaef67d40bc898b3ed4af81ac0d76a197c86826828a24bb318f3442d8ab518dfe3a20f000d6458d104a9694ac6d88728eee2782428d60cf03ac1a5
-193be4cbb921cd0b495fd054b5bd0f530c1931a3f7eaf9f7af9e3f45c70f9e1d3ff8e9f8e1c3e3073f5a42ceaa6d9c84e5552fbffdeccfc71fa33f9e7ef3f2d1
-17d57859c6fffac327bffcfc793510d26726ce8b2f9ffcf6ecc98baf3efdfdbb4715f04d814765f890c644a29be408edf3181433567125272371be15c308d3f2
-8acd249438c19a4b05fd9e8a1cf4cd296699771c393ac4b5e01d01e5a30a787d72cf1178108989a2159c77a2d801ee72ce3a5c545a6147f32a99793849c26ae6
-6252c6ed637c58c5bb8b13c7bfbd490a75330f4b47f16e441c31f7184e140e494214d273fc80900aedee52ead87597fa824b3e56e82e451d4c2b4d32a423279a
-668bb6690c7e9956e90cfe766cb37b077538abd27a8b1cba48c80acc2a841f12e698f13a9e281c57911ce298950d7e03aba84ac8c154f8655c4f2af074481847
-bd804859b5e696007d4b4edfc150b12addbecba6b18b148a1e54d1bc81392f23b7f84137c2715a851dd0242a633f900710a218ed715505dfe56e86e877f0034e
-16bafb0e258ebb4faf06b769e888340b103d3311da9750aa9d0a1cd3e4efca31a3508f6d0c5c5c398602f8e2ebc71591f5b616e24dd893aa3261fb44f95d843b
-5974bb5c04f4edafb95b7892ec1108f3f98de75dc97d5772bdff7cc95d94cf672db4b3da0a6557f70db629362d72bcb0431e53c6066acac80d699a6409fb44d0
-8741bdce9c0e4971624a2378cceaba830b05366b90e0ea23aaa241845368b0eb9e2612ca8c742851ca251ceccc70256d8d87265dd96361531f186c3d9058edf2
-c00eafe8e1fc5c509031bb4d680e9f39a3154de0accc56ae644441edd76156d7429d995bdd88664a9dc3ad50197c38af1a0c16d684060441db02565e85f3b966
-0d0713cc48a0ed6ef7dedc2dc60b17e92219e180643ed27acffba86e9c94c78ab90980d8a9f0913ee49d62b512b79626fb06dccee2a432bbc60276b9f7dec44b
-7904cfbca4f3f6443ab2a49c9c2c41476dafd55c6e7ac8c769db1bc399161ee314bc2e75cf8759081743be1236ec4f4d6693e5336fb672c5dc24a8c33585b5fb
-9cc24e1d4885545b58463634cc5416022cd19cacfccb4d30eb45296023fd35a458598360f8d7a4003bbaae25e331f155d9d9a5116d3bfb9a95523e51440ca2e0
-088dd844ec6370bf0e55d027a012ae264c45d02f708fa6ad6da6dce29c255df9f6cae0ec38666984b372ab5334cf640b37795cc860de4ae2816e95b21be5ceaf
-8a49f90b52a51cc6ff3355f47e0237052b81f6800fd7b802239daf6d8f0b1571a8426944fdbe80c6c1d40e8816b88b8569082ab84c36ff0539d4ff6dce591a26
-ade1c0a7f669880485fd484582903d284b26fa4e2156cff62e4b9265844c4495c495a9157b440e091bea1ab8aaf7760f4510eaa69a6465c0e04ec69ffb9e65d0
-28d44d4e39df9c1a52ecbd3607fee9cec7263328e5d661d3d0e4f62f44acd855ed7ab33cdf7bcb8ae889599bd5c8b3029895b6825696f6af29c239b75a5bb1e6
-345e6ee6c28117e73586c1a2214ae1be07e93fb0ff51e133fb65426fa843be0fb515c187064d0cc206a2fa926d3c902e907670048d931db4c1a44959d366ad93
-b65abe595f70a75bf03d616c2dd959fc7d4e6317cd99cbcec9c58b34766661c7d6766ca1a9c1b327531486c6f941c638c67cd22a7f75e2a37be0e82db8df9f30
-254d30c1372581a1f51c983c80e4b71ccdd28dbf000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d652f74
-68656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d363f24
-51eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e3198
-720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d9850528
-a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100828abc13fa0000001c0200001300000000000000000000000000
-000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b000000000000000000000000
-002b0100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c00000000000000000000000000140200007468
-656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d001400060008000000210096b5ade296060000501b000016000000000000000000
-00000000d10200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b010000270000000000
-00000000000000009b0900007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000960a00000000}
-{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d
-617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169
-6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363
-656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e}
-{\*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef1\lsdunhideuseddef1\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal;
-\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;
-\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;
-\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;\lsdpriority39 \lsdlocked0 toc 1;\lsdpriority39 \lsdlocked0 toc 2;\lsdpriority39 \lsdlocked0 toc 3;\lsdpriority39 \lsdlocked0 toc 4;
-\lsdpriority39 \lsdlocked0 toc 5;\lsdpriority39 \lsdlocked0 toc 6;\lsdpriority39 \lsdlocked0 toc 7;\lsdpriority39 \lsdlocked0 toc 8;\lsdpriority39 \lsdlocked0 toc 9;\lsdqformat1 \lsdpriority35 \lsdlocked0 caption;
-\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdpriority1 \lsdlocked0 Default Paragraph Font;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;
-\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdpriority59 \lsdlocked0 Table Grid;
-\lsdunhideused0 \lsdlocked0 Placeholder Text;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 1;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdunhideused0 \lsdlocked0 Revision;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;
-\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 1;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 2;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 2;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 3;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 4;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 4;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 5;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 5;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 6;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;
-\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;
-\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;
-\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdpriority37 \lsdlocked0 Bibliography;
-\lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;}}{\*\datastore 0105000002000000180000004d73786d6c322e534158584d4c5265616465722e352e3000000000000000000000060000
-d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff090006000000000000000000000001000000010000000000000000100000feffffff00000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-fffffffffffffffffdfffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffffffffffffec69d9888b8b3d4c859eaf6cd158be0f0000000000000000000000004077
-60480b5dd101feffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff00000000000000000000000000000000000000000000000000000000
-00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000
-000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff000000000000000000000000000000000000000000000000
-0000000000000000000000000000000000000000000000000105000000000000}}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-server/src/test/resources/test_recursive_embedded.docx
----------------------------------------------------------------------
diff --git a/tika-server/src/test/resources/test_recursive_embedded.docx b/tika-server/src/test/resources/test_recursive_embedded.docx
deleted file mode 100644
index cd562cb..0000000
Binary files a/tika-server/src/test/resources/test_recursive_embedded.docx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/pom.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/pom.xml b/tika-test-resources/pom.xml
index a39fc62..30d41e0 100644
--- a/tika-test-resources/pom.xml
+++ b/tika-test-resources/pom.xml
@@ -16,11 +16,6 @@
   
   <dependencies>
     <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
     </dependency>
@@ -83,6 +78,4 @@
       </plugin>
     </plugins>
   </build>
-  
-  
 </project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt b/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
deleted file mode 100644
index e6fa39e..0000000
--- a/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-WEEK_DAY=(?i)((sun)|(mon)|(tues)|(thurs)|(fri)|((sat)(ur)?))(day)?
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/tika-config.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/tika-config.xml b/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/tika-config.xml
deleted file mode 100644
index 267c399..0000000
--- a/tika-test-resources/src/test/resources/org/apache/tika/parser/ner/tika-config.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-    <parsers>
-        <parser class="org.apache.tika.parser.ner.NamedEntityParser">
-            <mime>text/plain</mime>
-            <mime>text/html</mime>
-            <mime>application/xhtml+xml</mime>
-        </parser>
-    </parsers>
-
-</properties>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/2exe.docx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/2exe.docx b/tika-test-resources/src/test/resources/test-documents/2exe.docx
new file mode 100644
index 0000000..64cfbe1
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/2exe.docx differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/2pic.doc
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/2pic.doc b/tika-test-resources/src/test/resources/test-documents/2pic.doc
new file mode 100644
index 0000000..75c53b3
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/2pic.doc differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/2pic.docx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/2pic.docx b/tika-test-resources/src/test/resources/test-documents/2pic.docx
new file mode 100644
index 0000000..fe424e4
Binary files /dev/null and b/tika-test-resources/src/test/resources/test-documents/2pic.docx differ

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-test-resources/src/test/resources/test-documents/CDEC_WEATHER_2010_03_02
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/CDEC_WEATHER_2010_03_02 b/tika-test-resources/src/test/resources/test-documents/CDEC_WEATHER_2010_03_02
new file mode 100644
index 0000000..c50e8e7
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/CDEC_WEATHER_2010_03_02
@@ -0,0 +1,98 @@
+Station ID	Start Date	Date	Time	Temp	Cond	Depth	DO	Flow	WXT510P	Latitude	Longitude
+SMN	03/02/2010	03/01/2010	23:00	14.5	791.00	53.00	7.5	1460		37.347214	-120.976181
+SMN	03/02/2010	03/01/2010	23:15	14.5	790.00	52.99	7.5	1450		37.347214	-120.976181
+SMN	03/02/2010	03/01/2010	23:30	14.5	788.00	53.03	7.4	1480		37.347214	-120.976181
+SMN	03/02/2010	03/01/2010	23:45	14.5	790.00	53.03	7.4	1480		37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	00:00	14.5	785.00	53.02	7.4	1470	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	00:15	14.5	786.00	53.00	7.4	1460	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	00:30	14.5	790.00	53.04	7.4	1480	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	00:45	14.5	792.00	53.02	7.3	1470	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	01:00	14.5	786.00	53.03	7.3	1480	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	01:15	14.5	787.00	53.03	7.3	1480	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	01:30	14.5	791.00	53.03	7.3	1480	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	01:45	14.5	789.00	53.04	7.3	1480	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	02:00	14.5	794.00	53.06	7.2	1490	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	02:15	14.4	801.00	53.06	7.2	1490	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	02:30	14.4	802.00	53.04	7.2	1480	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	02:45	14.4	803.00	53.07	7.2	1500	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	03:00	14.4	802.00	53.06	7.2	1490	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	03:15	14.4	803.00	53.08	7.2	1500	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	03:30	14.4	806.00	53.06	7.2	1490	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	03:45	14.4	807.00	53.08	7.1	1500	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	04:00	14.4	810.00	53.09	7.1	1510	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	04:15	14.4	810.00	53.10	7.1	1520	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	04:30	14.3	808.00	53.11	7.1	1520	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	04:45	14.3	810.00	53.11	7.1	1520	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	05:00	14.3	813.00	53.11	7.0	1520	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	05:15	14.3	811.00	53.11	7.0	1520	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	05:30	14.3	810.00	53.10	7.0	1520	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	05:45	14.3	805.00	53.12	7.0	1530	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	06:00	14.2	806.00	53.10	7.0	1520	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	06:15	14.2	805.00	53.12	7.0	1530	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	06:30	14.2	808.00	53.14	6.9	1540	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	06:45	14.2	809.00	53.14	6.9	1540	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	07:00	14.2	803.00	53.13	6.9	1530	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	07:15	14.2	807.00	53.13	6.9	1530	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	07:30	14.2	805.00	53.14	6.9	1540	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	07:45	14.2	811.00	53.14	6.9	1540	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	08:00	14.2	815.00	53.15	6.9	1540	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	08:15	14.3	817.00	53.13	6.9	1530	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	08:30	14.3	817.00	53.15	6.9	1540	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	08:45	14.3	811.00	53.16	6.8	1550	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	09:00	14.3	810.00	53.17	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	09:15	14.3	809.00	53.16	6.9	1550	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	09:30	14.3	813.00	53.18	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	09:45	14.3	813.00	53.17	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	10:00	14.3	813.00	53.19	6.9	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	10:15	14.3	820.00	53.17	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	10:30	14.3	818.00	53.18	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	10:45	14.3	821.00	53.19	6.9	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	11:00	14.3	821.00	53.18	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	11:15	14.3	825.00	53.18	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	11:30	14.3	827.00	53.17	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	11:45	14.3	825.00	53.18	6.9	1560	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	12:00	14.3	829.00	53.19	6.9	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	12:15	14.4	831.00	53.20	6.9	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	12:30	14.4	837.00	53.20	7.0	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	12:45	14.4	835.00	53.20	7.0	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	13:00	14.5	837.00	53.21	7.0	1580	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	13:15	14.5	837.00	53.20	7.0	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	13:30	14.5	842.00	53.20	7.0	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	13:45	14.5	848.00	53.22	7.0	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	14:00	14.5	850.00	53.20	7.0	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	14:15	14.5	851.00	53.20	7.0	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	14:30	14.5	849.00	53.20	7.1	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	14:45	14.6	858.00	53.20	7.1	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	15:00	14.6	869.00	53.20	7.1	1570	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	15:15	14.6	868.00	53.22	7.1	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	15:30	14.5	868.00	53.23	7.1	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	15:45	14.5	869.00	53.22	7.1	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	16:00	14.5	873.00	53.22	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	16:15	14.5	877.00	53.23	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	16:30	14.5	884.00	53.23	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	16:45	14.5	887.00	53.23	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	17:00	14.5	889.00	53.22	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	17:15	14.5	891.00	53.25	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	17:30	14.4	893.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	17:45	14.4	896.00	53.23	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	18:00	14.4	896.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	18:15	14.4	895.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	18:30	14.4	899.00	53.25	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	18:45	14.3	901.00	53.25	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	19:00	14.3	899.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	19:15	14.3	911.00	53.25	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	19:30	14.3	914.00	53.26	7.2	1610	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	19:45	14.3	913.00	53.22	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	20:00	14.3	914.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	20:15	14.2	915.00	53.22	7.3	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	20:30	14.2	917.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	20:45	14.2	919.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	21:00	14.2	919.00	53.23	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	21:15	14.2	923.00	53.21	7.2	1580	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	21:30	14.2	920.00	53.24	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	21:45	14.2	927.00	53.25	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	22:00	14.2	929.00	53.23	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	22:15	14.1	927.00	53.25	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	22:30	14.1	931.00	53.22	7.2	1590	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	22:45	14.1	931.00	53.25	7.2	1600	760	37.347214	-120.976181
+SMN	03/02/2010	03/02/2010	23:00	14.1	937.00	53.23	7.2	1590	760	37.347214	-120.976181