You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/02/19 21:25:01 UTC

[22/52] [partial] tika git commit: move test files to parser-modules

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif b/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
deleted file mode 100644
index 64974af..0000000
--- a/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
+++ /dev/null
@@ -1,84 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-        <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd">
-          <Entry_ID>005f3222-7548-11e2-851e-00c0f03d5b7c</Entry_ID>
-          <Entry_Title>Carbon Isotopic Values of Alkanes Extracted from Paleosols</Entry_Title>
-
-          <Parameters>
-            <Category>EARTH SCIENCE</Category>
-            <Topic>PALEOCLIMATE</Topic>
-            <Term>LAND RECORDS</Term>
-            <Variable_Level_1>PALEOSOLS</Variable_Level_1>
-          </Parameters>
-          <Parameters>
-            <Category>EARTH SCIENCE</Category>
-            <Topic>LAND SURFACE</Topic>
-            <Term>SOILS</Term>
-            <Variable_Level_1>CARBON</Variable_Level_1>
-          </Parameters>
-          <Parameters>
-            <Category>EARTH SCIENCE</Category>
-            <Topic>PALEOCLIMATE</Topic>
-            <Term>LAND RECORDS</Term>
-            <Variable_Level_1>ISOTOPES</Variable_Level_1>
-          </Parameters>
-          <Parameters>
-            <Category>EARTH SCIENCE</Category>
-            <Topic>BIOSPHERE</Topic>
-            <Term>ECOLOGICAL DYNAMICS</Term>
-            <Variable_Level_1>ECOSYSTEM FUNCTIONS</Variable_Level_1>
-            <Variable_Level_2>BIOGEOCHEMICAL CYCLES</Variable_Level_2>
-          </Parameters>
-          <Parameters>
-            <Category>EARTH SCIENCE</Category>
-            <Topic>SOLID EARTH</Topic>
-            <Term>GEOCHEMISTRY</Term>
-            <Variable_Level_1>BIOGEOCHEMICAL PROCESSES</Variable_Level_1>
-          </Parameters>
-
-
-          <Spatial_Coverage>
-            <Southernmost_Latitude>66.56</Southernmost_Latitude>
-            <Northernmost_Latitude>90</Northernmost_Latitude>
-            <Westernmost_Longitude>-180</Westernmost_Longitude>
-            <Easternmost_Longitude>180</Easternmost_Longitude>
-          </Spatial_Coverage>
-
-          <Data_Center>
-            <Data_Center_Name>
-              <Short_Name>ACADIS</Short_Name>
-              <Long_Name>Advanced Cooperative Arctic Data and Information Service</Long_Name>
-            </Data_Center_Name>
-            <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
-            <Personnel>
-              <Role>DATA CENTER CONTACT</Role>
-              <First_Name>ACADIS</First_Name>
-              <Last_Name>User Services</Last_Name>
-              <Contact_Address>
-                <Address>NCAR/CISL</Address>
-                <Address>P.O. Box 3000</Address>
-                <City>Boulder</City>
-                <Province_or_State>CO</Province_or_State>
-                <Postal_Code>80307</Postal_Code>
-                <Country>USA</Country>
-              </Contact_Address>
-            </Personnel>
-          </Data_Center>
-
-          <Summary>
-            <Abstract>Dataset consists of compound specific carbon isotopic values of alkanes
-extracted from paleosols. Values represent the mean of duplicate
-measurements.</Abstract>
-          </Summary>
-
-          <Related_URL>
-            <URL_Content_Type>
-              <Type>GET DATA</Type>
-            </URL_Content_Type>
-            <URL>http://www.aoncadis.org/dataset/id/005f3222-7548-11e2-851e-00c0f03d5b7c.html</URL>
-            <Description>Data Center top-level access page for this resource</Description>
-          </Related_URL>
-
-          <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
-          <Metadata_Version>9.8.4</Metadata_Version>
-          <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
-        </DIF>

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM b/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM
deleted file mode 100644
index 9498f3f..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM b/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM
deleted file mode 100644
index 64be97d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM b/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM
deleted file mode 100644
index 171943d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/admin.chm
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/admin.chm b/tika-test-resources/src/test/resources/test-documents/chm/admin.chm
deleted file mode 100644
index 6dadaae..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/admin.chm and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM b/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM
deleted file mode 100644
index 9dd7b82..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM b/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM
deleted file mode 100644
index aebeddf..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM b/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM
deleted file mode 100644
index a023143..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM b/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM
deleted file mode 100644
index 18c3cf8..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM b/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM
deleted file mode 100644
index 8856ebd..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/complex.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/complex.mbox b/tika-test-resources/src/test/resources/test-documents/complex.mbox
deleted file mode 100644
index 2aa4828..0000000
--- a/tika-test-resources/src/test/resources/test-documents/complex.mbox
+++ /dev/null
@@ -1,291 +0,0 @@
-From core-user-return-14700-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 04:28:28 2009
-Return-Path: <co...@hadoop.apache.org>
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 19921 invoked from network); 1 Jun 2009 04:28:28 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
-  by minotaur.apache.org with SMTP; 1 Jun 2009 04:28:28 -0000
-Received: (qmail 84995 invoked by uid 500); 1 Jun 2009 04:28:38 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 84895 invoked by uid 500); 1 Jun 2009 04:28:38 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help: <ma...@hadoop.apache.org>
-List-Unsubscribe: <ma...@hadoop.apache.org>
-List-Post: <ma...@hadoop.apache.org>
-List-Id: <core-user.hadoop.apache.org>
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 84885 invoked by uid 99); 1 Jun 2009 04:28:38 -0000
-Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
-    by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:38 +0000
-X-ASF-Spam-Status: No, hits=1.2 required=10.0
-	tests=SPF_NEUTRAL
-X-Spam-Check-By: apache.org
-Received-SPF: neutral (athena.apache.org: local policy)
-Received: from [69.147.107.21] (HELO mrout2-b.corp.re1.yahoo.com) (69.147.107.21)
-    by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:26 +0000
-Received: from SNV-EXPF01.ds.corp.yahoo.com (snv-expf01.ds.corp.yahoo.com [207.126.227.250])
-	by mrout2-b.corp.re1.yahoo.com (8.13.8/8.13.8/y.out) with ESMTP id n514QYA6099963
-	for <co...@hadoop.apache.org>; Sun, 31 May 2009 21:26:35 -0700 (PDT)
-DomainKey-Signature: a=rsa-sha1; s=serpent; d=yahoo-inc.com; c=nofws; q=dns;
-	h=received:user-agent:date:subject:from:to:message-id:
-	thread-topic:thread-index:in-reply-to:mime-version:content-type:
-	content-transfer-encoding:x-originalarrivaltime;
-	b=YVtSNdgjeeSBS1yY3XDolul49i+HrgNG7QszMo9LzGnrwejjgsl5+iUM6EiQgEpV
-Received: from SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.9]) by SNV-EXPF01.ds.corp.yahoo.com with Microsoft SMTPSVC(6.0.3790.3959);
-	 Sun, 31 May 2009 21:26:34 -0700
-Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ;
- Mon,  1 Jun 2009 04:26:33 +0000
-User-Agent: Microsoft-Entourage/12.17.0.090302
-Date: Mon, 01 Jun 2009 09:56:31 +0530
-Subject: Re: question about when shuffle/sort start working
-From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-To: <co...@hadoop.apache.org>
-Message-ID: <C6...@yahoo-inc.com>
-Thread-Topic: question about when shuffle/sort start working
-Thread-Index: AcnicSNoBw19cMU8UEaXwAdZ1YYhuw==
-In-Reply-To: <44...@web111005.mail.gq1.yahoo.com>
-Mime-version: 1.0
-Content-type: text/plain;
-	charset="US-ASCII"
-Content-transfer-encoding: 7bit
-X-OriginalArrivalTime: 01 Jun 2009 04:26:34.0501 (UTC) FILETIME=[257EAB50:01C9E271]
-X-Virus-Checked: Checked by ClamAV on apache.org
-
-When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-fetch map outputs for a given map only on the receipt of such events.
-
-Jothi
-
-
-On 5/30/09 10:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
-
-> Hi, 
-> I am being confused by the protocol between mapper and reducer. When mapper
-> emitting the (key,value) pair done, is there any signal the mapper send out to
-> hadoop framework in protocol to indicate that map is done and the shuffle/sort
-> can begin for reducer? If there is no this signal in protocol, when the
-> framework begin the shuffle/sort?
-> 
-> Thanks,
-> Jianmin
-> 
-> 
-> 
->       
-
-
-From core-user-return-14701-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 05:31:14 2009
-Return-Path: <co...@hadoop.apache.org>
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 38243 invoked from network); 1 Jun 2009 05:31:14 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
-  by minotaur.apache.org with SMTP; 1 Jun 2009 05:31:14 -0000
-Received: (qmail 15621 invoked by uid 500); 1 Jun 2009 05:31:24 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 15557 invoked by uid 500); 1 Jun 2009 05:31:24 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help: <ma...@hadoop.apache.org>
-List-Unsubscribe: <ma...@hadoop.apache.org>
-List-Post: <ma...@hadoop.apache.org>
-List-Id: <core-user.hadoop.apache.org>
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 15547 invoked by uid 99); 1 Jun 2009 05:31:24 -0000
-Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230)
-    by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 05:31:24 +0000
-X-ASF-Spam-Status: No, hits=2.2 required=10.0
-	tests=HTML_MESSAGE,SPF_PASS
-X-Spam-Check-By: apache.org
-Received-SPF: pass (nike.apache.org: local policy)
-Received: from [68.142.237.94] (HELO n9.bullet.re3.yahoo.com) (68.142.237.94)
-    by apache.org (qpsmtpd/0.29) with SMTP; Mon, 01 Jun 2009 05:31:11 +0000
-Received: from [68.142.237.88] by n9.bullet.re3.yahoo.com with NNFMP; 01 Jun 2009 05:30:50 -0000
-Received: from [67.195.9.82] by t4.bullet.re3.yahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000
-Received: from [67.195.9.99] by t2.bullet.mail.gq1.yahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000
-Received: from [127.0.0.1] by omp103.mail.gq1.yahoo.com with NNFMP; 01 Jun 2009 05:28:01 -0000
-X-Yahoo-Newman-Property: ymail-3
-X-Yahoo-Newman-Id: 796121.97519.bm@omp103.mail.gq1.yahoo.com
-Received: (qmail 35264 invoked by uid 60001); 1 Jun 2009 05:30:49 -0000
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yahoo.com; s=s1024; t=1243834249; bh=R8qzdi/IbLyO8UwpnaujDpT9E+6bJ7nkmZN2803EmRk=; h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type; b=vq4c6RIDbkuLPYd8mirusIXf6DqTb/IeT55In7W00Y5Sxx1ZiXBb78yE9+TDfXJ0elsEZvqv4ocyvolGE0eGtyYeJA0mZikpRNu6pidxPNpCplOcLHBRz7YQ7iERwv3TagRlWy2Xd3oD9ZeV0A05P7WUOiNNX1PUUJD1IVdrEZo=
-DomainKey-Signature:a=rsa-sha1; q=dns; c=nofws;
-  s=s1024; d=yahoo.com;
-  h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type;
-  b=6HXZV98ON5vBwmE/xS8stVD0D2F4dkMY7a0suX5KVTb736JdR8G59mqBq/dWcpbFTLiCLtxi18LMb/dU1RKRGOEdn3l3j/jKXhBrhIgfg3qtNskPedXDKBvn7JGXiSkqpA/tUtPjvc0Uuk8/LaA01SQTz40Engg7nD8/EJdIAhA=;
-Message-ID: <59...@web111010.mail.gq1.yahoo.com>
-X-YMail-OSG: KzhhrJYVM1m.MCS6vRpRP2ZZO2PrfnbngosELDCIa91ZqvhJph4RdmzfUW0jw9W04RCSch1K730bPohwNpNBIk2QR_zt4_mfbhfq7YEPkSoz9LSXG90P9vIo5Fc8qyZN0U6vA9gtdyGQTpN5ahvillUH9nAF0TMWv2SvZJLjPlQ0Z0p8oK8ltBwGTgLrM8Jtdn9D29yoRyi3_EpVOfdD9OP.EK50Vr1XwSUYMbnpZ0WGHMwd.Yig7A6Elwadm3YVbfOdx2mfrG.jQsUAxQjRBNvbrOM57.FaE11kHTe9aoBWSeihNg--
-Received: from [216.145.54.7] by web111010.mail.gq1.yahoo.com via HTTP; Sun, 31 May 2009 22:30:49 PDT
-X-Mailer: YahooMailRC/1277.43 YahooMailWebService/0.7.289.10
-References: <C6...@yahoo-inc.com>
-Date: Sun, 31 May 2009 22:30:49 -0700 (PDT)
-From: Jianmin Woo <ji...@yahoo.com>
-Subject: Re: question about when shuffle/sort start working
-To: core-user@hadoop.apache.org
-In-Reply-To: <C6...@yahoo-inc.com>
-MIME-Version: 1.0
-Content-Type: multipart/alternative; boundary="0-1193839393-1243834249=:35091"
-X-Virus-Checked: Checked by ClamAV on apache.org
-
---0-1193839393-1243834249=:35091
-Content-Type: text/plain; charset=us-ascii
-
-Thanks a lot for your explanation, Jothi. 
-
-So is this event generated by hadoop framework? Is there any API in mapper to fire this event? Actually, I am thinking to implement a mapper that will emit some <key, value> pairs, then fire this event to let the reducer works, the same mapper task then emit some other <key, value> pairs and repeat. Do you think is this logic feasible by current API?
-
-Thanks,
-Jianmin
-
-
-
-
-
-________________________________
-From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-To: core-user@hadoop.apache.org
-Sent: Monday, June 1, 2009 12:26:31 PM
-Subject: Re: question about when shuffle/sort start working
-
-When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-fetch map outputs for a given map only on the receipt of such events.
-
-Jothi
-
-
-On 5/30/09 10:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
-
-> Hi, 
-> I am being confused by the protocol between mapper and reducer. When mapper
-> emitting the (key,value) pair done, is there any signal the mapper send out to
-> hadoop framework in protocol to indicate that map is done and the shuffle/sort
-> can begin for reducer? If there is no this signal in protocol, when the
-> framework begin the shuffle/sort?
-> 
-> Thanks,
-> Jianmin
-> 
-> 
-> 
->      
-
-
-      
---0-1193839393-1243834249=:35091--
-
-
-From core-user-return-14702-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 06:04:30 2009
-Return-Path: <co...@hadoop.apache.org>
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 53387 invoked from network); 1 Jun 2009 06:04:29 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
-  by minotaur.apache.org with SMTP; 1 Jun 2009 06:04:29 -0000
-Received: (qmail 39066 invoked by uid 500); 1 Jun 2009 06:04:39 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 38970 invoked by uid 500); 1 Jun 2009 06:04:39 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help: <ma...@hadoop.apache.org>
-List-Unsubscribe: <ma...@hadoop.apache.org>
-List-Post: <ma...@hadoop.apache.org>
-List-Id: <core-user.hadoop.apache.org>
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 38955 invoked by uid 99); 1 Jun 2009 06:04:39 -0000
-Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
-    by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:39 +0000
-X-ASF-Spam-Status: No, hits=1.2 required=10.0
-	tests=SPF_NEUTRAL
-X-Spam-Check-By: apache.org
-Received-SPF: neutral (athena.apache.org: local policy)
-Received: from [216.145.54.172] (HELO mrout2.yahoo.com) (216.145.54.172)
-    by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:28 +0000
-Received: from SNV-EXBH01.ds.corp.yahoo.com (snv-exbh01.ds.corp.yahoo.com [207.126.227.249])
-	by mrout2.yahoo.com (8.13.6/8.13.6/y.out) with ESMTP id n5163FGq038852
-	for <co...@hadoop.apache.org>; Sun, 31 May 2009 23:03:15 -0700 (PDT)
-DomainKey-Signature: a=rsa-sha1; s=serpent; d=yahoo-inc.com; c=nofws; q=dns;
-	h=received:user-agent:date:subject:from:to:message-id:
-	thread-topic:thread-index:in-reply-to:mime-version:content-type:
-	content-transfer-encoding:x-originalarrivaltime;
-	b=rChE4SCnwtWaZpjhovkiXDKfDiVNdRRvsadSGG9S9bgvOexn/9/5JjEQx1pOR7Nb
-Received: from SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.9]) by SNV-EXBH01.ds.corp.yahoo.com with Microsoft SMTPSVC(6.0.3790.3959);
-	 Sun, 31 May 2009 23:03:15 -0700
-Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ;
- Mon,  1 Jun 2009 06:03:15 +0000
-User-Agent: Microsoft-Entourage/12.17.0.090302
-Date: Mon, 01 Jun 2009 11:33:13 +0530
-Subject: Re: question about when shuffle/sort start working
-From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-To: <co...@hadoop.apache.org>
-Message-ID: <C6...@yahoo-inc.com>
-Thread-Topic: question about when shuffle/sort start working
-Thread-Index: AcnifqWrLG6N7GAk7kqy9QalVWfegQ==
-In-Reply-To: <59...@web111010.mail.gq1.yahoo.com>
-Mime-version: 1.0
-Content-type: text/plain;
-	charset="US-ASCII"
-Content-transfer-encoding: 7bit
-X-OriginalArrivalTime: 01 Jun 2009 06:03:15.0462 (UTC) FILETIME=[A7231260:01C9E27E]
-X-Virus-Checked: Checked by ClamAV on apache.org
-
-
-No you cannot raise this event yourself, this event is generated internally
-by the framework. 
-
-I am guessing that what you probably want is to have a chain of MapReduce
-Jobs where the output of one is automatically fed as input to another.  You
-can look at these classes: JobControl and ChainMapper/ChainReducer.
-
-Jothi
-
-On 6/1/09 11:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
-
-> Thanks a lot for your explanation, Jothi.
-> 
-> So is this event generated by hadoop framework? Is there any API in mapper to
-> fire this event? Actually, I am thinking to implement a mapper that will emit
-> some <key, value> pairs, then fire this event to let the reducer works, the
-> same mapper task then emit some other <key, value> pairs and repeat. Do you
-> think is this logic feasible by current API?
-> 
-> Thanks,
-> Jianmin
-> 
-> 
-> 
-> 
-> 
-> ________________________________
-> From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-> To: core-user@hadoop.apache.org
-> Sent: Monday, June 1, 2009 12:26:31 PM
-> Subject: Re: question about when shuffle/sort start working
-> 
-> When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-> fetch map outputs for a given map only on the receipt of such events.
-> 
-> Jothi
-> 
-> 
-> On 5/30/09 10:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
-> 
->> Hi, 
->> I am being confused by the protocol between mapper and reducer. When mapper
->> emitting the (key,value) pair done, is there any signal the mapper send out
->> to
->> hadoop framework in protocol to indicate that map is done and the
->> shuffle/sort
->> can begin for reducer? If there is no this signal in protocol, when the
->> framework begin the shuffle/sort?
->> 
->> Thanks,
->> Jianmin
->> 
->> 
->> 
->>      
-> 
-> 
->       
-
-

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2 b/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2
deleted file mode 100644
index be2cb87..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/english.cp500.txt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/english.cp500.txt b/tika-test-resources/src/test/resources/test-documents/english.cp500.txt
deleted file mode 100644
index 734aab6..0000000
--- a/tika-test-resources/src/test/resources/test-documents/english.cp500.txt
+++ /dev/null
@@ -1 +0,0 @@
-@@@@@@@@@ɕ�����⣁����@�����@
%
%@@@@@@@@@@@@ȁ������@
%
%@@@@@@@@@@@@Ǚ������@
%
%@@@@@@@@@@@@⤗����@���@��������@
%
%@@@@@@@@@@@@م�������@
%
%
%
%
%@@@@@@@@@@@@م�����@�����
%@@@@@@@@@@@@恙�������k@��������@���@�����������
%@@@@@@@@@@@@ז���@⨢����
%@@@@@@@@@@@@⨢����@�
%@@@@@@@@@@@@���@⨢���@�@م������
%@@@@@@@@@@@@━��@��������@��������@������
%@@@@@@@@@@@@ŧ�����@���������@���@������@��������
%@@@@@@@@@@@@¤������@ׁ������
%
%
%@@@@@@@@@@@@@���@⨢����@@n@料���������@@n@ɕ�����⣁����@�����@@n@
%@@@@@@@@@@@@@@@@@@ȁ������@@n@@
%
%@@@@@@@@@@@@@@@@@
 @���@ɕ�����⣁����@�����@���@ŧ�����@@
%@@@@@@@@@@@@@@@@@@@@@@@@י�����@�������™����@P@��������������
%
%@@@@@@@@@@@@@@@@@@@@@@@@֥������@@@j@@@ƅ������@P@…������@@@@j@@@◅�����������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@j@@@ׅ���������@ā��@@@@j@@
%
%
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@Ӆ���@����
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@���@ɕ�����⣁����@�����@���@ŧ�����@�����������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@M����]
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ㅃ������@��������@���@������������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ɓ���@P@��������@������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@⨢���@�����������@�
 ������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ȁ������@�������������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@⤗����
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@������`�����@��a������@���@ť��������@M�����]
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Dž�@�����V@م����V
%
%
%@@@@@@@@@@@@@@@@@@㈅@���@ɕ�����⣁����@ז���@���@���@���@ɕ�����⣁����@ז���@
%@@@@@@@@@@@@@@@@@@���@����@����@���������@��@�@��������@����������@����@
%@@@@@@@@@@@@@@@@@@���������@����@��@���������@����@��@с�����@�k@����K@֕@��@
%@@@@@@@@@@@@@@@@@@�����@���@���������@����@��@����������k@��
 �@���@��@������@
%@@@@@@@@@@@@@@@@@@�����@�����@��������@��������@����@���K@@薤@���@������@���@
%@@@@@@@@@@@@@@@@@@��������@��@��@��`���������@�����@�������@���@¤������@
%@@@@@@@@@@@@@@@@@@ׁ������K
%@@@@@@@@@@@@@@@@@@@@@@@@ɕ���������@��@�����@���@料��������@���������
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@ȉ��������
%
%
%@@@@@@@@@@@@@@@@@@@@⤗����@���@����@��`���@���@��`���@����@��@��������@
%@@@@@@@@@@@@@@@@@@@@������������@
%@@@@@@@@@@@@@@@@@@@@ؤ���@���������@��@�@��������@�������@
%@@@@@@@@@@@@@@@@@@@@Ł��@������
 ������@�������@
%@@@@@@@@@@@@@@@@@@@@ą������@���@��������@�����������@���@��������@�������@
%@@@@@@@@@@@@@@@@@@@@������������@
%@@@@@@@@@@@@@@@@@@㈅@���@���@ɕ�����⣁����V@�����9@���@ŧ�����@�����������@
%@@@@@@@@@@@@@@@@@@��������@��@����������@���@�����@��������@�������@����@������@
%@@@@@@@@@@@@@@@@@@��������@���@���������@�����������@���@�@�����@����@
%@@@@@@@@@@@@@@@@@@����������K@䢅@��@���@����`���@ԅ��������@Ö������@�����@
%@@@@@@@@@@@@@@@@@@ą����@M����]k@Ö����
 ��@�����@��������@M���]k@��@�������@
%@@@@@@@@@@@@@@@@@@����������@���@�����@��������@���@���������@������������K@扣�@
%@@@@@@@@@@@@@@@@@@��@��������@������������k@��@��@��@���������@�����������@���@
%@@@@@@@@@@@@@@@@@@��������@�����������@���@�������
%
%@@@@@@@@@@@@@@@@@@扣�@�������`����@���@ז�����V@���@���������@����������@���@
%@@@@@@@@@@@@@@@@@@���@����元9k@���@����@����������@��`���@���������@
%@@@@@@@@@@@@@@@@@@���������������@ɕ�����⣁����@��
 ���@���@ŧ�����@�����������@
%@@@@@@@@@@@@@@@@@@������@�����������@�����a�����������@��������K@ɣ@��@���@
%@@@@@@@@@@@@@@@@@@�������@����������@��@���@���@ɕ�����⣁����@�����@���@
%@@@@@@@@@@@@@@@@@@ŧ�������������@�@������@��@�����������@���@�����K@���@
%@@@@@@@@@@@@@@@@@@�������@������@���@���@��9@��@Ӊ���V@���������@������@M��]k@
%@@@@@@@@@@@@@@@@@@���@��@����@�������O
%
%@@@@@@@@@@@@@@@@@@ɕ�����⣁����@�����@������������@���@���@�����@�����������@
%@@@@@@@@@@@@@@
 @@@@������@������������@����������@��`���@�����@��@���@�����@
%@@@@@@@@@@@@@@@@@@�����������@����������K@Ɩ�@�����@����@���������k@���@�����@
%@@@@@@@@@@@@@@@@@@���@ŧ�����@��������@������@�����������@����@���@�����������@@
%@@@@@@@@@@@@@@@@@@�@k@���@���@ɕ�����⣁����@�����@���@�K�@�ȩk@�����@����������@
%@@@@@@@@@@@@@@@@@@����@����@�����K@¨@��������@���@�����@�����k@���@�����@���@
%@@@@@@@@@@@@@@@@@@ŧ�����@������@���@������@��@�������@���k@������
 �@����@������@
%@@@@@@@@@@@@@@@@@@���@��������@������������K
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@����@��������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@ą������@�����������@���������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@�`@��@�`����@���@������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��`���@ז�����@����������@����@����元9@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@����������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@����@��@��@��������@�������@��@��@��������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@������������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@
 @@@@@@@@@@@@@@@@@g@⤗������@��@���@��9@M��K�@��@��K�]@���@Ӊ���@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@�������������@����@م�@ȁ�@M����@��@�]@���@����@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ӊ���@M����@�]
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@ȁ������@�������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@ą������@���������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@֕�@��@���@��`���@�K�@�ȩ@ז�����@���@����������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@���@��@�����@���@���������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@�����@��@���@��@���@�ȩ@���@��
 �����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ɩ��@���`�@�������@�����@M���@�ȩ@M��`���]^@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@����@�ȩ@M��`���]]^@֕�@���@����@M��@�ȩ@
%M��`���]]
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@㈙��@����@����@����@���@��@��@�K���@��@��������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@�������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ĥ��@�������@䓣�����@����@����������@M��������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@���@��������]
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ĥ��@������@ţ������@��a���a����@Ԃ��@����������
%@@@
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ɩ��@���k@���@������@�����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@㦖@����`����@�����@����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��@��������@�������@����@��������@���@��������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��@��������@�����@�������◁����V@���@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@◁��Ԗ���V
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��@��������@�����@�������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%
%
%
%
%@@@@@@@@@@@@@@@@@@@@
%
%
%
%@@@@@@@@@@@@@@@@@@@@@�@���@��@���Ã������@���������@�����@��@�
 ��@ƅ������@��k@
%@@@@@@@@@@@@@@@@@@����@���������@��@⣁�����@ׅ���������@ť��������@Ö���������@
%@@@@@@@@@@@@@@@@@@���@��@��@���������@��@���K����K���K
%@@@@@@@@@@@@@@@@@@@�}��@����@��@����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ł��@����@��@���@���@�������@���@����K
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@��@���
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@م�����@�@�����
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@��@����@��@��
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@�`���`����`���
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@ؤ�����@�������
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ӆ���@����
%
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@㉔�������@��������
 �
%
%@@@@@@@@@@@@@@@@@@@@@@@@����@��������@��������@��@����@���@�������
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@剢��@剢��Á��
%
%
%
%
%@@�`����@����@����י���@����@����ĉ��@����⁥�@��@���K����K�������@���@י�����@
%@@Ö�����@ㅙ��@��@���@���@ƅ���
%
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr b/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr
deleted file mode 100644
index 37d73de..0000000
--- a/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr
+++ /dev/null
@@ -1,16 +0,0 @@
-ENVI
-description = {
-  GEO-TIFF File Imported into ENVI [Fri May 25 14:06:23 2012]}
-samples = 2400
-lines   = 2400
-bands   = 7
-header offset = 0
-file type = ENVI Standard
-data type = 2
-interleave = bip
-sensor type = Unknown
-byte order = 0
-map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , units=Meters}
-projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}
-coordinate system string = {PROJCS["Sinusoidal",GEOGCS["GCS_ELLIPSE_BASED_1",DATUM["D_ELLIPSE_BASED_1",SPHEROID["S_ELLIPSE_BASED_1",6371007.181,0.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Sinusoidal"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],UNIT["Meter",1.0]]}
-wavelength units = Unknown

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/footnotes.docx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/footnotes.docx b/tika-test-resources/src/test/resources/test-documents/footnotes.docx
deleted file mode 100644
index db4386c..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/footnotes.docx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2 b/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2
deleted file mode 100644
index 7ab3416..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/headerPic.docx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/headerPic.docx b/tika-test-resources/src/test/resources/test-documents/headerPic.docx
deleted file mode 100644
index 01072e5..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/headerPic.docx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/headers.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/headers.mbox b/tika-test-resources/src/test/resources/test-documents/headers.mbox
deleted file mode 100644
index d607bc9..0000000
--- a/tika-test-resources/src/test/resources/test-documents/headers.mbox
+++ /dev/null
@@ -1,7 +0,0 @@
-From envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009
-Return-Path: <na...@domain.com>
-Subject: subject
-From: <au...@domain.com>
-Date: Tue, 9 Jun 2009 23:58:45 -0400
-
-Test content

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/jxl.xls
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/jxl.xls b/tika-test-resources/src/test/resources/test-documents/jxl.xls
deleted file mode 100644
index 569e21d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/jxl.xls and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/moby.zip
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/moby.zip b/tika-test-resources/src/test/resources/test-documents/moby.zip
deleted file mode 100644
index 71dac43..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/moby.zip and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml b/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml
deleted file mode 100644
index 38a7731..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">main_content</write>
-    <!-- auto detection wasn't working for some reason; add content-type as
-        is to trigger mock on the embedded -->
-    <embedded filename="embed1.xml" content-type="application/mock+xml">
-        &lt;mock&gt;
-            &lt;metadata action=&quot;add&quot; name=&quot;author&quot;&gt;embeddedAuthor&lt;/metadata&gt;
-            &lt;write element="p"&gt;some_embedded_content&lt;/write&gt;
-        &lt;/mock&gt;
-    </embedded>
-    <throw class="java.lang.NullPointerException">another null pointer exception</throw>
-
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/example.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/example.xml b/tika-test-resources/src/test/resources/test-documents/mock/example.xml
deleted file mode 100644
index df1a762..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/example.xml
+++ /dev/null
@@ -1,51 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <!-- this file offers all of the options as documentation
-    Parsing should stop at an IOException, of course
-    -->
-
-    <!-- action can be "add" or "set" -->
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <!-- element is the name of the sax event to write, p=paragraph
-        if the element is not specified, the default is <p> -->
-    <write element="p">some content</write>
-    <!-- write something to System.out -->
-    <print_out>writing to System.out</print_out>
-    <!-- write something to System.err -->
-    <print_err>writing to System.err</print_err>
-    <!-- hang
-        millis: how many milliseconds to pause.  The actual hang time will probably
-            be a bit longer than the value specified.        heavy: whether or not the hang should do something computationally expensive.
-            If the value is false, this just does a Thread.sleep(millis).
-            This attribute is optional, with default of heavy=false.
-        pulse_millis: (required if "heavy" is true), how often to check to see
-            whether the thread was interrupted or that the total hang time exceeded the millis
-        interruptible: whether or not the parser will check to see if its thread
-            has been interrupted; this attribute is optional with default of true
-    -->
-    <hang millis="100" heavy="true" pulse_millis="10" interruptible="true" />
-    <!-- throw an exception or error; optionally include a message or not -->
-    <throw class="java.io.IOException">not another IOException</throw>
-    <!-- perform a genuine OutOfMemoryError -->
-    <oom/>
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml b/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml
deleted file mode 100644
index 6f090d4..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <throw class="java.lang.OutOfMemoryError">not another oom</throw>
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml b/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml
deleted file mode 100644
index df5bbfd..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">some content</write>
-    <hang millis="3000" heavy="true" pulse_millis="100" />
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml b/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml
deleted file mode 100644
index e3656a8..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml
+++ /dev/null
@@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Geoffrey Chaucer</metadata>
-    <write element="p">Whan that Aprille with his shoures soote</write>
-    <write>The droghte of Marche hath perced to the roote,</write>
-    <write>And bathed every veyne in swich licour,</write>
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml b/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
deleted file mode 100644
index 4561c3a..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">some content</write>
-    <throw class="java.lang.NullPointerException">another null pointer exception</throw>
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml b/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml
deleted file mode 100644
index 33f3f83..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">some content</write>
-    <throw class="java.lang.NullPointerException"/>
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml b/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml
deleted file mode 100644
index 168751a..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <oom/>
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml b/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml
deleted file mode 100644
index 991cdc2..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">some content</write>
-    <hang millis="3000" heavy="false"  />
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml b/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml
deleted file mode 100644
index 8d84ead..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">some content</write>
-    <hang millis="3000" heavy="false" interruptible="true" />
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml b/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml
deleted file mode 100644
index 7994095..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<mock>
-    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
-    <write element="p">some content</write>
-    <hang millis="3000" heavy="false" interruptible="false" />
-</mock>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/multiline.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/multiline.mbox b/tika-test-resources/src/test/resources/test-documents/multiline.mbox
deleted file mode 100644
index fff7dfb..0000000
--- a/tika-test-resources/src/test/resources/test-documents/multiline.mbox
+++ /dev/null
@@ -1,5 +0,0 @@
-From envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009
-Received: from xxx
-    by xxx with xxx; date
-
-Test content

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/pictures.ppt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/pictures.ppt b/tika-test-resources/src/test/resources/test-documents/pictures.ppt
deleted file mode 100644
index 9f6ce6d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/pictures.ppt and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/protect.xlsx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/protect.xlsx b/tika-test-resources/src/test/resources/test-documents/protect.xlsx
deleted file mode 100644
index 1767b14..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/protect.xlsx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx b/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx
deleted file mode 100644
index 1767b14..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx b/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx
deleted file mode 100644
index 09f6a77..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/quoted.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/quoted.mbox b/tika-test-resources/src/test/resources/test-documents/quoted.mbox
deleted file mode 100644
index 05d0fd8..0000000
--- a/tika-test-resources/src/test/resources/test-documents/quoted.mbox
+++ /dev/null
@@ -1,4 +0,0 @@
-From envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009
-
-Test content
-> quoted stuff
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/resume.html
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/resume.html b/tika-test-resources/src/test/resources/test-documents/resume.html
deleted file mode 100644
index 1bd747a..0000000
--- a/tika-test-resources/src/test/resources/test-documents/resume.html
+++ /dev/null
@@ -1,73 +0,0 @@
-
-
-	<div class="js-helper">
-	<style type="text/css">#style_13209008630000000884_BODY{background-color:#FFFFFF;color:#000000;MARGIN:0px 1px;font-family:Tahoma,Arial,Verdana,Sans-Serif}#style_13209008630000000884 TD{font-size:13px;font-family:Tahoma,Arial,Verdana,Sans-Serif;vertical-align:top}#style_13209008630000000884 CAPTION{font-size:13px;font-weight:bold;text-align:left}#style_13209008630000000884 TR.style_13209008630000000884thead TD{font-weight:bold;text-align:center; padding-bottom:6px;padding-top:6px;padding-left:2px;padding-right:2px}#style_13209008630000000884 H1{font-size:24px;margin-bottom:15px;margin-top:5px;display:block;font-weight:normal;}#style_13209008630000000884 H2{font-size:22px;margin-bottom:5px;margin-top:5px;display:block;font-weight:normal;letter-spacing:1px}#style_13209008630000000884 H1.style_13209008630000000884in, #style_13209008630000000884 H2.style_13209008630000000884in, #style_13209008630000000884 H3.style_13209008630000000884in{font-size:100%;margin-bottom:0px;margin-top:0px;di
 splay:inline;}#style_13209008630000000884 A, #style_13209008630000000884 A.style_13209008630000000884notvisited:visited, #style_13209008630000000884 .style_13209008630000000884notvisited A:visited, #style_13209008630000000884 .style_13209008630000000884menu A:visited{color:#00418F;text-decoration:none}#style_13209008630000000884 A:visited{color:#6699CC;text-decoration:none;}#style_13209008630000000884 A:hover, #style_13209008630000000884 A.style_13209008630000000884notvisited:hover, #style_13209008630000000884 .style_13209008630000000884notvisited A:hover, #style_13209008630000000884 .style_13209008630000000884menu A:hover{color:#990000;text-decoration:underline}#style_13209008630000000884 .style_13209008630000000884bold, #style_13209008630000000884 .style_13209008630000000884bold H1{font-weight:bold}#style_13209008630000000884 .style_13209008630000000884u{text-decoration:underline}#style_13209008630000000884 .style_13209008630000000884gray, #style_13209008630000000884 A.style_13209
 008630000000884gray:visited, #style_13209008630000000884 LEGEND{color:#7A7A7A}#style_13209008630000000884 .style_13209008630000000884red, #style_13209008630000000884 A.style_13209008630000000884red:visited{color:#C2311A}#style_13209008630000000884 EM, #style_13209008630000000884 .style_13209008630000000884imp, #style_13209008630000000884 .style_13209008630000000884field_warning{color:#C2311A;font-weight:bold;font-style:normal}#style_13209008630000000884 TABLE.style_13209008630000000884bl_table TR TD{padding:2px; padding-left:10px}#style_13209008630000000884 TD.style_13209008630000000884bl_row_name{color:#555; width:10%}#style_13209008630000000884 TD.style_13209008630000000884vacancydark, #style_13209008630000000884 TR.style_13209008630000000884vacancydark TD, #style_13209008630000000884 TD.style_13209008630000000884resumedark, #style_13209008630000000884 TR.style_13209008630000000884resumedark TD, #style_13209008630000000884 TD.style_13209008630000000884serverdark, #style_1320900863
 0000000884 TR.style_13209008630000000884serverdark TD{text-align:center;padding-bottom:3px;padding-top:3px;padding-left:1px;padding-right:1px;font-weight:bold}#style_13209008630000000884 TD.style_13209008630000000884vacancydark, #style_13209008630000000884 TR.style_13209008630000000884vacancydark TD, #style_13209008630000000884 TD.style_13209008630000000884vacancydark A, #style_13209008630000000884 TD.style_13209008630000000884vacancydark A:visited, #style_13209008630000000884 TD.style_13209008630000000884vacancydark A:hover, #style_13209008630000000884 TD.style_13209008630000000884resumedark, #style_13209008630000000884 TR.style_13209008630000000884resumedark TD, #style_13209008630000000884 TD.style_13209008630000000884resumedark A, #style_13209008630000000884 TD.style_13209008630000000884resumedark A:visited, #style_13209008630000000884 TD.style_13209008630000000884resumedark A:hover, #style_13209008630000000884 TD.style_13209008630000000884serverdark, #style_13209008630000000884 
 TR.style_13209008630000000884serverdark TD, #style_13209008630000000884 TD.style_13209008630000000884serverdark A, #style_13209008630000000884 TD.style_13209008630000000884serverdark A:visited, #style_13209008630000000884 TD.style_13209008630000000884serverdark A:hover{color:#000000;}#style_13209008630000000884 TD.style_13209008630000000884vacancydark, #style_13209008630000000884 TR.style_13209008630000000884vacancydark TD{background-color:#FFDDBB;}#style_13209008630000000884 TD.style_13209008630000000884vacancylight, #style_13209008630000000884 TR.style_13209008630000000884vacancylight TD{background-color:#FFF5EC}#style_13209008630000000884 TD.style_13209008630000000884resumedark, #style_13209008630000000884 TR.style_13209008630000000884resumedark TD{background-color:#D3E9E9;}#style_13209008630000000884 TD.style_13209008630000000884resumelight, #style_13209008630000000884 TR.style_13209008630000000884resumelight TD{background-color:#ECF8F7}#style_13209008630000000884 TD.style_13209
 008630000000884serverdark, #style_13209008630000000884 TR.style_13209008630000000884serverdark TD{background-color:#ABC2D5;}#style_13209008630000000884 TR.style_13209008630000000884serverlight TD, #style_13209008630000000884 TD.style_13209008630000000884serverlight{background-color:#E2EBF5}#style_13209008630000000884 TD.style_13209008630000000884blankheader1{font-size:24px; padding:10px}#style_13209008630000000884 TD.style_13209008630000000884blankheader2{font-size:22px; padding:10px}#style_13209008630000000884 TABLE.style_13209008630000000884resumelist TR.thead TD{background-color:#ABC2D5;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist TR.thead TD{background-color:#ABC2D5;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TR.thead TD{background-color:#DBDBDB;}#style_13209008630000000884 TABLE TR.style_13209008630000000884wr TD{background-color:#FFFFFF}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD{bord
 er-bottom:#DBDBDB 1px solid}#style_13209008630000000884 .style_13209008630000000884list TR TD{background-color:#E2EBF5;padding:5px}#style_13209008630000000884 .style_13209008630000000884list TR.thead TD{background-color:#ABC2D5;color:#555555;text-align:center; padding-bottom:8px;padding-top:8px;padding-left:1px;padding-right:1px;font-weight:bold;}#style_13209008630000000884 .style_13209008630000000884list TR.wr TD{background-color:#F3F7FB}#style_13209008630000000884 A.style_13209008630000000884list_details, #style_13209008630000000884 A.style_13209008630000000884list_details:visited, #style_13209008630000000884 A.style_13209008630000000884list_details:hover{color:#7A7A7A;text-decoration:none;line-height:120%}#style_13209008630000000884 TD.style_13209008630000000884cell, #style_13209008630000000884 TD.style_13209008630000000884c{padding-top:3px;padding-left:5px;padding-right:5px}#style_13209008630000000884 BIG{font-size:24px}#style_13209008630000000884 .style_13209008630000000884smal
 l, #style_13209008630000000884 SMALL{font-size:85%}#style_13209008630000000884 UL{margin-left:25px;margin-bottom:0px}#style_13209008630000000884 TD.style_13209008630000000884small, #style_13209008630000000884 .style_13209008630000000884verysmall, #style_13209008630000000884 .style_13209008630000000884verysmall INPUT, #style_13209008630000000884 .style_13209008630000000884verysmall SELECT{font-size:11px}#style_13209008630000000884 DIV.style_13209008630000000884localmenu{padding-top:10px;margin-bottom:15px;}#style_13209008630000000884 DIV.style_13209008630000000884localmenu A, #style_13209008630000000884 DIV.style_13209008630000000884localmenu A:visited{text-decoration:underline;font-weight:bold}#style_13209008630000000884 DIV.style_13209008630000000884comment{font-size:85%; background-color:#DDFFDD; padding:4px; border:1px solid #CCC;cursor:default;}#style_13209008630000000884 HR{color:#ABC2D5;background-color:#ABC2D5;height:1px;border:0px solid #ABC2D5}#style_13209008630000000884 DI
 V.style_13209008630000000884dotsline{font-size:1px; margin-top:4px; margin-bottom:5px; border-bottom:#BACBD7 1px dotted}#style_13209008630000000884 TABLE.style_13209008630000000884rctable TR TD{background-color:#E5EDF7;}#style_13209008630000000884 TD.style_13209008630000000884rc1{padding-top:10px; padding-left:10px;}#style_13209008630000000884 TD.style_13209008630000000884rc2{font-size:1px; width:10px;}#style_13209008630000000884 TD.style_13209008630000000884rc3{height:10px; font-size:1px;}#style_13209008630000000884 TD.style_13209008630000000884rc4{height:10px; font-size:1px;}#style_13209008630000000884 SPAN.style_13209008630000000884super{color:#003398;font-size:150%}#style_13209008630000000884 SPAN.style_13209008630000000884job{color:#FF0000;font-size:150%}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD TABLE.to_site_button{background-color:#99cc00; margin:0px 5px 3px 0px;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail
  TD TABLE.to_site_button TD{background-color:#99cc00; font-weight:normal; color:#ffffff; border-bottom:0px; padding-top:6px; padding-right:7px; padding-bottom:6px; padding-left:7px; vertical-align:middle; text-align:center;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD TABLE.to_site_button TD A, #style_13209008630000000884 TABLE.style_13209008630000000884to_site_button TD A:visited{color:#ffffff; text-decoration:none; font-weight:normal;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD TABLE.to_site_button TD A:hover{color:#ffffff; text-decoration:underline; font-weight:normal;}#style_13209008630000000884 .style_13209008630000000884row{clear:left; padding-bottom:4px;}#style_13209008630000000884 .style_13209008630000000884row2{margin-bottom:8px;}#style_13209008630000000884 .style_13209008630000000884col1{float:left; width:140px; color:#555555; margin-right:-145px;}#style_13209008630000000884 .style_13209008630000000884c
 ol2{margin-left:145px;}#style_13209008630000000884 DIV.style_13209008630000000884resume_rightcol{float:right; width:280px; margin:0px 0px 10px 30px;}#style_13209008630000000884 DIV.style_13209008630000000884blankheader1{font-size:190%;}
-</style>
-	<div id="style_13209008630000000884" class="mr_read__body">
-		<base target="_self" href="http://e.mail.ru/cgi-bin/" />
-		
-			<div id="style_13209008630000000884_BODY">
-
-
-
-<style type="text/css" ></style>
-
-
-<table width="100%" cellspacing="0" cellpadding="0" height="100%" border="0" >
-<tr ><td >
-
-</td></tr>
-<tr ><td style="padding:5px" height="100%" >
-Здравствуйте, !<br >
-<br >
-Предлагаем Вам ознакомиться со списком зарегистрированных компаний, представители которых просмотрели Ваше резюме за последние сутки.<br >
-<br >
-<li ><a target="_blank" href="/cgi-bin/link?check=1&cnf=710139&url=http%3A%2F%2;0,0" >Компании, просмотревшие резюме № .</a> Новые: <b >1.</b></li><br >
-<br >
-Эти сведения предоставляются Вам исключительно для информации. Вы можете оперативно отслеживать, какие именно компании нашли в базе данных Superjob Ваше резюме и заинтересовались им.<br >
-<br >
-Если Ваше резюме размещено в закрытом доступе, то его могут просматривать только те работодатели, которым Вы отправили его самостоятельно.<br >
-Историю отправки своего резюме Вы можете посмотреть по ссылке «История рассылки резюме».<br >
-<br >
-<br >
-<b >Внимание!</b><br >
-В процессе поиска работы Вы можете столкнуться с такими предложениями работодателей или кадровых агентств, в которых Вас будут просить внести оплату (за предварительное обучение, за оформление документов, за оформление обязательной страховки, на закупку первой партии продукции компании, предназначенной для продажи и т.п.) или предоставить отсканированные копии документов (паспорта, военного билета, трудовой книжки, водительских прав, пенсионного удостоверния и т.п.) для якобы предварительного оформления или подтверждения данных, указа
 нных в Вашем резюме.<br >
-Это один из признаков мошенничества! Мы рекомендуем Вам очень осторожно относиться к таким предложениям и по возможности избегать собеседований с подобными работодателями.<br >
-<br >
-Также мы настоятельно не рекомендуем отправлять платные SMS-сообщения на короткие номера для получения контактов или другой информации о вакансии или же для получения результатов тестирования. С организациями, которые оказывают подобные услуги, мы не сотрудничаем и предупреждаем, что это тоже один из приемов мошенничества.<br >
-<br >
-<br >
-<em >x</em> <a target="_blank" href="/cgi-bin/link?check=1&cnf=8d972a&url=http%3A%2F%2Fwww.sup;0,0" >Отключить уведомления о новых просмотрах моих резюме</a><br >
-<br >
-По ссылкам в этом письме можно войти в систему без ввода пароля.
-<br ><br >
-</td>
-</tr>
-<tr >
-<td >
-<span class="style_13209008630000000884noprint" ><br ><br >Если у Вас есть пожелания и идеи по улучшению сервиса Superjob, пожалуйста, <a target="_blank" href="/cgi-bin/link?check=1;0,0" >напишите нам</a>.<br ><br ></span>
-<table width="100%" cellspacing="0" cellpadding="10" border="0" class="style_13209008630000000884noprint" >
-<tr ><td align="center" style="border-top:1px solid #BACBD7;" >
-<a target="_blank" href="/cgi-bin/link?check=1&cnf=8fa2f9&url=http%3A%2F%2Fwww.;0,0" ><big >Superjob — Работа должна доставлять удовольствие!</big></a>
-</td></tr>
-</table>
-<table width="100%" cellspacing="1" cellpadding="0" border="0" class="style_13209008630000000884noprint" >
-<tr ><td align="center" style="padding:5px" >
-<span style="color:#999999;font-size:8pt;" >Письмо отправлено: xx.xx.xxxx xx:xx:xx</span>
-</td></tr>
-</table>
-
-</td></tr>
-</table>
-
-
-
-</div>
-			
-		
-		<base target="_self" href="http://e.mail.ru/cgi-bin/" />
-	</div>
-</div>
-
-
-

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/rsstest.rss
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/rsstest.rss b/tika-test-resources/src/test/resources/test-documents/rsstest.rss
deleted file mode 100644
index 758f6a1..0000000
--- a/tika-test-resources/src/test/resources/test-documents/rsstest.rss
+++ /dev/null
@@ -1,36 +0,0 @@
-<?xml version="1.0" encoding="ISO-8859-1" ?>
-<!--
-	Licensed to the Apache Software Foundation (ASF) under one or more
-	contributor license agreements.  See the NOTICE file distributed with
-	this work for additional information regarding copyright ownership.
-	The ASF licenses this file to You under the Apache License, Version 2.0
-	(the "License"); you may not use this file except in compliance with
-	the License.  You may obtain a copy of the License at
-	
-	http://www.apache.org/licenses/LICENSE-2.0
-	
-	Unless required by applicable law or agreed to in writing, software
-	distributed under the License is distributed on an "AS IS" BASIS,
-	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	See the License for the specific language governing permissions and
-	limitations under the License.
--->
-<rss version="0.91">
-    <channel>
-      <title>TestChannel</title>
-      <link>http://test.channel.com/</link> 
-      <description>Sample RSS File for Junit test</description> 
-      <language>en-us</language>
-      
-      <item>
-        <title>Home Page of Chris Mattmann</title>
-        <link>http://www-scf.usc.edu/~mattmann/</link>
-        <description>Chris Mattmann's home page</description>
-      </item>
-      <item>
-        <title>Awesome Open Source Search Engine</title> 
-        <link>http://www.nutch.org/</link> 
-        <description>Yup, that's what it is</description> 
-      </item>
-   </channel>
-</rss>

http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt b/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt
deleted file mode 100644
index 050cc87..0000000
--- a/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-�������, � ��㤥��� ������ ����,
-� �� ���� ��襫; �� ᨫ�� ��஧.
-����, ����������� �������� � ����
-��蠤��, ������ 墮���� ���.
-�, ����� �����, � ᯮ����⢨� 稭���,
-��蠤�� ����� ��� 㧤�� �㦨箪