You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/02/19 21:25:01 UTC
[22/52] [partial] tika git commit: move test files to parser-modules
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif b/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
deleted file mode 100644
index 64974af..0000000
--- a/tika-test-resources/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
+++ /dev/null
@@ -1,84 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
- <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd">
- <Entry_ID>005f3222-7548-11e2-851e-00c0f03d5b7c</Entry_ID>
- <Entry_Title>Carbon Isotopic Values of Alkanes Extracted from Paleosols</Entry_Title>
-
- <Parameters>
- <Category>EARTH SCIENCE</Category>
- <Topic>PALEOCLIMATE</Topic>
- <Term>LAND RECORDS</Term>
- <Variable_Level_1>PALEOSOLS</Variable_Level_1>
- </Parameters>
- <Parameters>
- <Category>EARTH SCIENCE</Category>
- <Topic>LAND SURFACE</Topic>
- <Term>SOILS</Term>
- <Variable_Level_1>CARBON</Variable_Level_1>
- </Parameters>
- <Parameters>
- <Category>EARTH SCIENCE</Category>
- <Topic>PALEOCLIMATE</Topic>
- <Term>LAND RECORDS</Term>
- <Variable_Level_1>ISOTOPES</Variable_Level_1>
- </Parameters>
- <Parameters>
- <Category>EARTH SCIENCE</Category>
- <Topic>BIOSPHERE</Topic>
- <Term>ECOLOGICAL DYNAMICS</Term>
- <Variable_Level_1>ECOSYSTEM FUNCTIONS</Variable_Level_1>
- <Variable_Level_2>BIOGEOCHEMICAL CYCLES</Variable_Level_2>
- </Parameters>
- <Parameters>
- <Category>EARTH SCIENCE</Category>
- <Topic>SOLID EARTH</Topic>
- <Term>GEOCHEMISTRY</Term>
- <Variable_Level_1>BIOGEOCHEMICAL PROCESSES</Variable_Level_1>
- </Parameters>
-
-
- <Spatial_Coverage>
- <Southernmost_Latitude>66.56</Southernmost_Latitude>
- <Northernmost_Latitude>90</Northernmost_Latitude>
- <Westernmost_Longitude>-180</Westernmost_Longitude>
- <Easternmost_Longitude>180</Easternmost_Longitude>
- </Spatial_Coverage>
-
- <Data_Center>
- <Data_Center_Name>
- <Short_Name>ACADIS</Short_Name>
- <Long_Name>Advanced Cooperative Arctic Data and Information Service</Long_Name>
- </Data_Center_Name>
- <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
- <Personnel>
- <Role>DATA CENTER CONTACT</Role>
- <First_Name>ACADIS</First_Name>
- <Last_Name>User Services</Last_Name>
- <Contact_Address>
- <Address>NCAR/CISL</Address>
- <Address>P.O. Box 3000</Address>
- <City>Boulder</City>
- <Province_or_State>CO</Province_or_State>
- <Postal_Code>80307</Postal_Code>
- <Country>USA</Country>
- </Contact_Address>
- </Personnel>
- </Data_Center>
-
- <Summary>
- <Abstract>Dataset consists of compound specific carbon isotopic values of alkanes
-extracted from paleosols. Values represent the mean of duplicate
-measurements.</Abstract>
- </Summary>
-
- <Related_URL>
- <URL_Content_Type>
- <Type>GET DATA</Type>
- </URL_Content_Type>
- <URL>http://www.aoncadis.org/dataset/id/005f3222-7548-11e2-851e-00c0f03d5b7c.html</URL>
- <Description>Data Center top-level access page for this resource</Description>
- </Related_URL>
-
- <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
- <Metadata_Version>9.8.4</Metadata_Version>
- <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
- </DIF>
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM b/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM
deleted file mode 100644
index 9498f3f..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCL.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM b/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM
deleted file mode 100644
index 64be97d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/IMJPCLE.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM b/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM
deleted file mode 100644
index 171943d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/IMTCEN.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/admin.chm
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/admin.chm b/tika-test-resources/src/test/resources/test-documents/chm/admin.chm
deleted file mode 100644
index 6dadaae..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/admin.chm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM b/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM
deleted file mode 100644
index 9dd7b82..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/cmak_ops.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM b/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM
deleted file mode 100644
index aebeddf..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/comexp.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM b/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM
deleted file mode 100644
index a023143..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/gpedit.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM b/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM
deleted file mode 100644
index 18c3cf8..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/tcpip.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM b/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM
deleted file mode 100644
index 8856ebd..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/chm/wmicontrol.CHM and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/complex.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/complex.mbox b/tika-test-resources/src/test/resources/test-documents/complex.mbox
deleted file mode 100644
index 2aa4828..0000000
--- a/tika-test-resources/src/test/resources/test-documents/complex.mbox
+++ /dev/null
@@ -1,291 +0,0 @@
-From core-user-return-14700-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 04:28:28 2009
-Return-Path: <co...@hadoop.apache.org>
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 19921 invoked from network); 1 Jun 2009 04:28:28 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
- by minotaur.apache.org with SMTP; 1 Jun 2009 04:28:28 -0000
-Received: (qmail 84995 invoked by uid 500); 1 Jun 2009 04:28:38 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 84895 invoked by uid 500); 1 Jun 2009 04:28:38 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help: <ma...@hadoop.apache.org>
-List-Unsubscribe: <ma...@hadoop.apache.org>
-List-Post: <ma...@hadoop.apache.org>
-List-Id: <core-user.hadoop.apache.org>
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 84885 invoked by uid 99); 1 Jun 2009 04:28:38 -0000
-Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:38 +0000
-X-ASF-Spam-Status: No, hits=1.2 required=10.0
- tests=SPF_NEUTRAL
-X-Spam-Check-By: apache.org
-Received-SPF: neutral (athena.apache.org: local policy)
-Received: from [69.147.107.21] (HELO mrout2-b.corp.re1.yahoo.com) (69.147.107.21)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:26 +0000
-Received: from SNV-EXPF01.ds.corp.yahoo.com (snv-expf01.ds.corp.yahoo.com [207.126.227.250])
- by mrout2-b.corp.re1.yahoo.com (8.13.8/8.13.8/y.out) with ESMTP id n514QYA6099963
- for <co...@hadoop.apache.org>; Sun, 31 May 2009 21:26:35 -0700 (PDT)
-DomainKey-Signature: a=rsa-sha1; s=serpent; d=yahoo-inc.com; c=nofws; q=dns;
- h=received:user-agent:date:subject:from:to:message-id:
- thread-topic:thread-index:in-reply-to:mime-version:content-type:
- content-transfer-encoding:x-originalarrivaltime;
- b=YVtSNdgjeeSBS1yY3XDolul49i+HrgNG7QszMo9LzGnrwejjgsl5+iUM6EiQgEpV
-Received: from SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.9]) by SNV-EXPF01.ds.corp.yahoo.com with Microsoft SMTPSVC(6.0.3790.3959);
- Sun, 31 May 2009 21:26:34 -0700
-Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ;
- Mon, 1 Jun 2009 04:26:33 +0000
-User-Agent: Microsoft-Entourage/12.17.0.090302
-Date: Mon, 01 Jun 2009 09:56:31 +0530
-Subject: Re: question about when shuffle/sort start working
-From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-To: <co...@hadoop.apache.org>
-Message-ID: <C6...@yahoo-inc.com>
-Thread-Topic: question about when shuffle/sort start working
-Thread-Index: AcnicSNoBw19cMU8UEaXwAdZ1YYhuw==
-In-Reply-To: <44...@web111005.mail.gq1.yahoo.com>
-Mime-version: 1.0
-Content-type: text/plain;
- charset="US-ASCII"
-Content-transfer-encoding: 7bit
-X-OriginalArrivalTime: 01 Jun 2009 04:26:34.0501 (UTC) FILETIME=[257EAB50:01C9E271]
-X-Virus-Checked: Checked by ClamAV on apache.org
-
-When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-fetch map outputs for a given map only on the receipt of such events.
-
-Jothi
-
-
-On 5/30/09 10:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
-
-> Hi,
-> I am being confused by the protocol between mapper and reducer. When mapper
-> emitting the (key,value) pair done, is there any signal the mapper send out to
-> hadoop framework in protocol to indicate that map is done and the shuffle/sort
-> can begin for reducer? If there is no this signal in protocol, when the
-> framework begin the shuffle/sort?
->
-> Thanks,
-> Jianmin
->
->
->
->
-
-
-From core-user-return-14701-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 05:31:14 2009
-Return-Path: <co...@hadoop.apache.org>
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 38243 invoked from network); 1 Jun 2009 05:31:14 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
- by minotaur.apache.org with SMTP; 1 Jun 2009 05:31:14 -0000
-Received: (qmail 15621 invoked by uid 500); 1 Jun 2009 05:31:24 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 15557 invoked by uid 500); 1 Jun 2009 05:31:24 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help: <ma...@hadoop.apache.org>
-List-Unsubscribe: <ma...@hadoop.apache.org>
-List-Post: <ma...@hadoop.apache.org>
-List-Id: <core-user.hadoop.apache.org>
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 15547 invoked by uid 99); 1 Jun 2009 05:31:24 -0000
-Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 05:31:24 +0000
-X-ASF-Spam-Status: No, hits=2.2 required=10.0
- tests=HTML_MESSAGE,SPF_PASS
-X-Spam-Check-By: apache.org
-Received-SPF: pass (nike.apache.org: local policy)
-Received: from [68.142.237.94] (HELO n9.bullet.re3.yahoo.com) (68.142.237.94)
- by apache.org (qpsmtpd/0.29) with SMTP; Mon, 01 Jun 2009 05:31:11 +0000
-Received: from [68.142.237.88] by n9.bullet.re3.yahoo.com with NNFMP; 01 Jun 2009 05:30:50 -0000
-Received: from [67.195.9.82] by t4.bullet.re3.yahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000
-Received: from [67.195.9.99] by t2.bullet.mail.gq1.yahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000
-Received: from [127.0.0.1] by omp103.mail.gq1.yahoo.com with NNFMP; 01 Jun 2009 05:28:01 -0000
-X-Yahoo-Newman-Property: ymail-3
-X-Yahoo-Newman-Id: 796121.97519.bm@omp103.mail.gq1.yahoo.com
-Received: (qmail 35264 invoked by uid 60001); 1 Jun 2009 05:30:49 -0000
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yahoo.com; s=s1024; t=1243834249; bh=R8qzdi/IbLyO8UwpnaujDpT9E+6bJ7nkmZN2803EmRk=; h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type; b=vq4c6RIDbkuLPYd8mirusIXf6DqTb/IeT55In7W00Y5Sxx1ZiXBb78yE9+TDfXJ0elsEZvqv4ocyvolGE0eGtyYeJA0mZikpRNu6pidxPNpCplOcLHBRz7YQ7iERwv3TagRlWy2Xd3oD9ZeV0A05P7WUOiNNX1PUUJD1IVdrEZo=
-DomainKey-Signature:a=rsa-sha1; q=dns; c=nofws;
- s=s1024; d=yahoo.com;
- h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type;
- b=6HXZV98ON5vBwmE/xS8stVD0D2F4dkMY7a0suX5KVTb736JdR8G59mqBq/dWcpbFTLiCLtxi18LMb/dU1RKRGOEdn3l3j/jKXhBrhIgfg3qtNskPedXDKBvn7JGXiSkqpA/tUtPjvc0Uuk8/LaA01SQTz40Engg7nD8/EJdIAhA=;
-Message-ID: <59...@web111010.mail.gq1.yahoo.com>
-X-YMail-OSG: KzhhrJYVM1m.MCS6vRpRP2ZZO2PrfnbngosELDCIa91ZqvhJph4RdmzfUW0jw9W04RCSch1K730bPohwNpNBIk2QR_zt4_mfbhfq7YEPkSoz9LSXG90P9vIo5Fc8qyZN0U6vA9gtdyGQTpN5ahvillUH9nAF0TMWv2SvZJLjPlQ0Z0p8oK8ltBwGTgLrM8Jtdn9D29yoRyi3_EpVOfdD9OP.EK50Vr1XwSUYMbnpZ0WGHMwd.Yig7A6Elwadm3YVbfOdx2mfrG.jQsUAxQjRBNvbrOM57.FaE11kHTe9aoBWSeihNg--
-Received: from [216.145.54.7] by web111010.mail.gq1.yahoo.com via HTTP; Sun, 31 May 2009 22:30:49 PDT
-X-Mailer: YahooMailRC/1277.43 YahooMailWebService/0.7.289.10
-References: <C6...@yahoo-inc.com>
-Date: Sun, 31 May 2009 22:30:49 -0700 (PDT)
-From: Jianmin Woo <ji...@yahoo.com>
-Subject: Re: question about when shuffle/sort start working
-To: core-user@hadoop.apache.org
-In-Reply-To: <C6...@yahoo-inc.com>
-MIME-Version: 1.0
-Content-Type: multipart/alternative; boundary="0-1193839393-1243834249=:35091"
-X-Virus-Checked: Checked by ClamAV on apache.org
-
---0-1193839393-1243834249=:35091
-Content-Type: text/plain; charset=us-ascii
-
-Thanks a lot for your explanation, Jothi.
-
-So is this event generated by hadoop framework? Is there any API in mapper to fire this event? Actually, I am thinking to implement a mapper that will emit some <key, value> pairs, then fire this event to let the reducer works, the same mapper task then emit some other <key, value> pairs and repeat. Do you think is this logic feasible by current API?
-
-Thanks,
-Jianmin
-
-
-
-
-
-________________________________
-From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-To: core-user@hadoop.apache.org
-Sent: Monday, June 1, 2009 12:26:31 PM
-Subject: Re: question about when shuffle/sort start working
-
-When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-fetch map outputs for a given map only on the receipt of such events.
-
-Jothi
-
-
-On 5/30/09 10:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
-
-> Hi,
-> I am being confused by the protocol between mapper and reducer. When mapper
-> emitting the (key,value) pair done, is there any signal the mapper send out to
-> hadoop framework in protocol to indicate that map is done and the shuffle/sort
-> can begin for reducer? If there is no this signal in protocol, when the
-> framework begin the shuffle/sort?
->
-> Thanks,
-> Jianmin
->
->
->
->
-
-
-
---0-1193839393-1243834249=:35091--
-
-
-From core-user-return-14702-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 06:04:30 2009
-Return-Path: <co...@hadoop.apache.org>
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 53387 invoked from network); 1 Jun 2009 06:04:29 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
- by minotaur.apache.org with SMTP; 1 Jun 2009 06:04:29 -0000
-Received: (qmail 39066 invoked by uid 500); 1 Jun 2009 06:04:39 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 38970 invoked by uid 500); 1 Jun 2009 06:04:39 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help: <ma...@hadoop.apache.org>
-List-Unsubscribe: <ma...@hadoop.apache.org>
-List-Post: <ma...@hadoop.apache.org>
-List-Id: <core-user.hadoop.apache.org>
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 38955 invoked by uid 99); 1 Jun 2009 06:04:39 -0000
-Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:39 +0000
-X-ASF-Spam-Status: No, hits=1.2 required=10.0
- tests=SPF_NEUTRAL
-X-Spam-Check-By: apache.org
-Received-SPF: neutral (athena.apache.org: local policy)
-Received: from [216.145.54.172] (HELO mrout2.yahoo.com) (216.145.54.172)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:28 +0000
-Received: from SNV-EXBH01.ds.corp.yahoo.com (snv-exbh01.ds.corp.yahoo.com [207.126.227.249])
- by mrout2.yahoo.com (8.13.6/8.13.6/y.out) with ESMTP id n5163FGq038852
- for <co...@hadoop.apache.org>; Sun, 31 May 2009 23:03:15 -0700 (PDT)
-DomainKey-Signature: a=rsa-sha1; s=serpent; d=yahoo-inc.com; c=nofws; q=dns;
- h=received:user-agent:date:subject:from:to:message-id:
- thread-topic:thread-index:in-reply-to:mime-version:content-type:
- content-transfer-encoding:x-originalarrivaltime;
- b=rChE4SCnwtWaZpjhovkiXDKfDiVNdRRvsadSGG9S9bgvOexn/9/5JjEQx1pOR7Nb
-Received: from SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.9]) by SNV-EXBH01.ds.corp.yahoo.com with Microsoft SMTPSVC(6.0.3790.3959);
- Sun, 31 May 2009 23:03:15 -0700
-Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.yahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ;
- Mon, 1 Jun 2009 06:03:15 +0000
-User-Agent: Microsoft-Entourage/12.17.0.090302
-Date: Mon, 01 Jun 2009 11:33:13 +0530
-Subject: Re: question about when shuffle/sort start working
-From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-To: <co...@hadoop.apache.org>
-Message-ID: <C6...@yahoo-inc.com>
-Thread-Topic: question about when shuffle/sort start working
-Thread-Index: AcnifqWrLG6N7GAk7kqy9QalVWfegQ==
-In-Reply-To: <59...@web111010.mail.gq1.yahoo.com>
-Mime-version: 1.0
-Content-type: text/plain;
- charset="US-ASCII"
-Content-transfer-encoding: 7bit
-X-OriginalArrivalTime: 01 Jun 2009 06:03:15.0462 (UTC) FILETIME=[A7231260:01C9E27E]
-X-Virus-Checked: Checked by ClamAV on apache.org
-
-
-No you cannot raise this event yourself, this event is generated internally
-by the framework.
-
-I am guessing that what you probably want is to have a chain of MapReduce
-Jobs where the output of one is automatically fed as input to another. You
-can look at these classes: JobControl and ChainMapper/ChainReducer.
-
-Jothi
-
-On 6/1/09 11:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
-
-> Thanks a lot for your explanation, Jothi.
->
-> So is this event generated by hadoop framework? Is there any API in mapper to
-> fire this event? Actually, I am thinking to implement a mapper that will emit
-> some <key, value> pairs, then fire this event to let the reducer works, the
-> same mapper task then emit some other <key, value> pairs and repeat. Do you
-> think is this logic feasible by current API?
->
-> Thanks,
-> Jianmin
->
->
->
->
->
-> ________________________________
-> From: Jothi Padmanabhan <jo...@yahoo-inc.com>
-> To: core-user@hadoop.apache.org
-> Sent: Monday, June 1, 2009 12:26:31 PM
-> Subject: Re: question about when shuffle/sort start working
->
-> When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-> fetch map outputs for a given map only on the receipt of such events.
->
-> Jothi
->
->
-> On 5/30/09 10:00 AM, "Jianmin Woo" <ji...@yahoo.com> wrote:
->
->> Hi,
->> I am being confused by the protocol between mapper and reducer. When mapper
->> emitting the (key,value) pair done, is there any signal the mapper send out
->> to
->> hadoop framework in protocol to indicate that map is done and the
->> shuffle/sort
->> can begin for reducer? If there is no this signal in protocol, when the
->> framework begin the shuffle/sort?
->>
->> Thanks,
->> Jianmin
->>
->>
->>
->>
->
->
->
-
-
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2 b/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2
deleted file mode 100644
index be2cb87..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/egyl03.gdas.200811.00Z.grb2 and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/english.cp500.txt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/english.cp500.txt b/tika-test-resources/src/test/resources/test-documents/english.cp500.txt
deleted file mode 100644
index 734aab6..0000000
--- a/tika-test-resources/src/test/resources/test-documents/english.cp500.txt
+++ /dev/null
@@ -1 +0,0 @@
-@@@@@@@@@ɕ�����⣁����@�����@
%
%@@@@@@@@@@@@ȁ������@
%
%@@@@@@@@@@@@Ǚ������@
%
%@@@@@@@@@@@@⤗����@���@��������@
%
%@@@@@@@@@@@@م�������@
%
%
%
%
%@@@@@@@@@@@@م�����@�����
%@@@@@@@@@@@@恙�������k@��������@���@�����������
%@@@@@@@@@@@@ז���@⨢����
%@@@@@@@@@@@@⨢����@�
%@@@@@@@@@@@@���@⨢���@�@م������
%@@@@@@@@@@@@━��@��������@��������@������
%@@@@@@@@@@@@ŧ�����@���������@���@������@��������
%@@@@@@@@@@@@¤������@ׁ������
%
%
%@@@@@@@@@@@@@���@⨢����@@n@料���������@@n@ɕ�����⣁����@�����@@n@
%@@@@@@@@@@@@@@@@@@ȁ������@@n@@
%
%@@@@@@@@@@@@@@@@@
@���@ɕ�����⣁����@�����@���@ŧ�����@@
%@@@@@@@@@@@@@@@@@@@@@@@@י�����@�����������@P@��������������
%
%@@@@@@@@@@@@@@@@@@@@@@@@֥������@@@j@@@ƅ������@P@
������@@@@j@@@◅�����������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@j@@@ׅ���������@ā��@@@@j@@
%
%
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@Ӆ���@����
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@���@ɕ�����⣁����@�����@���@ŧ�����@�����������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@M����]
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ㅃ������@��������@���@������������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ɓ���@P@��������@������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@⨢���@�����������@�
������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ȁ������@�������������
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@⤗����
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@������`�����@��a������@���@ť��������@M�����]
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Dž�@�����V@م����V
%
%
%@@@@@@@@@@@@@@@@@@㈅@���@ɕ�����⣁����@ז���@���@���@���@ɕ�����⣁����@ז���@
%@@@@@@@@@@@@@@@@@@���@����@����@���������@��@�@��������@����������@����@
%@@@@@@@@@@@@@@@@@@���������@����@��@���������@����@��@с�����@�k@����K@֕@��@
%@@@@@@@@@@@@@@@@@@�����@���@���������@����@��@����������k@��
�@���@��@������@
%@@@@@@@@@@@@@@@@@@�����@�����@��������@��������@����@���K@@薤@���@������@���@
%@@@@@@@@@@@@@@@@@@��������@��@��@��`���������@�����@�������@���@¤������@
%@@@@@@@@@@@@@@@@@@ׁ������K
%@@@@@@@@@@@@@@@@@@@@@@@@ɕ���������@��@�����@���@料��������@���������
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@ȉ��������
%
%
%@@@@@@@@@@@@@@@@@@@@⤗����@���@����@��`���@���@��`���@����@��@��������@
%@@@@@@@@@@@@@@@@@@@@������������@
%@@@@@@@@@@@@@@@@@@@@ؤ���@���������@��@�@��������@�������@
%@@@@@@@@@@@@@@@@@@@@Ł��@������
������@�������@
%@@@@@@@@@@@@@@@@@@@@ą������@���@��������@�����������@���@��������@�������@
%@@@@@@@@@@@@@@@@@@@@������������@
%@@@@@@@@@@@@@@@@@@㈅@���@���@ɕ�����⣁����V@�����9@���@ŧ�����@�����������@
%@@@@@@@@@@@@@@@@@@��������@��@����������@���@�����@��������@�������@����@������@
%@@@@@@@@@@@@@@@@@@��������@���@���������@�����������@���@�@�����@����@
%@@@@@@@@@@@@@@@@@@����������K@䢅@��@���@����`���@ԅ��������@Ö������@�����@
%@@@@@@@@@@@@@@@@@@ą����@M����]k@Ö����
��@�����@��������@M���]k@��@�������@
%@@@@@@@@@@@@@@@@@@����������@���@�����@��������@���@���������@������������K@扣�@
%@@@@@@@@@@@@@@@@@@��@��������@������������k@��@��@��@���������@�����������@���@
%@@@@@@@@@@@@@@@@@@��������@�����������@���@�������
%
%@@@@@@@@@@@@@@@@@@扣�@�������`����@���@ז�����V@���@���������@����������@���@
%@@@@@@@@@@@@@@@@@@���@����元9k@���@����@����������@��`���@���������@
%@@@@@@@@@@@@@@@@@@���������������@ɕ�����⣁����@��
���@���@ŧ�����@�����������@
%@@@@@@@@@@@@@@@@@@������@�����������@�����a�����������@��������K@ɣ@��@���@
%@@@@@@@@@@@@@@@@@@�������@����������@��@���@���@ɕ�����⣁����@�����@���@
%@@@@@@@@@@@@@@@@@@ŧ�������������@�@������@��@�����������@���@�����K@���@
%@@@@@@@@@@@@@@@@@@�������@������@���@���@��9@��@Ӊ���V@���������@������@M��]k@
%@@@@@@@@@@@@@@@@@@���@��@����@�������O
%
%@@@@@@@@@@@@@@@@@@ɕ�����⣁����@�����@������������@���@���@�����@�����������@
%@@@@@@@@@@@@@@
@@@@������@������������@����������@��`���@�����@��@���@�����@
%@@@@@@@@@@@@@@@@@@�����������@����������K@Ɩ�@�����@����@���������k@���@�����@
%@@@@@@@@@@@@@@@@@@���@ŧ�����@��������@������@�����������@����@���@�����������@@
%@@@@@@@@@@@@@@@@@@�@k@���@���@ɕ�����⣁����@�����@���@�K�@�ȩk@�����@����������@
%@@@@@@@@@@@@@@@@@@����@����@�����K@¨@��������@���@�����@�����k@���@�����@���@
%@@@@@@@@@@@@@@@@@@ŧ�����@������@���@������@��@�������@���k@������
�@����@������@
%@@@@@@@@@@@@@@@@@@���@��������@������������K
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@����@��������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@ą������@�����������@���������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@�`@��@�`����@���@������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��`���@ז�����@����������@����@����元9@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@����������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@����@��@��@��������@�������@��@��@��������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@������������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@g@⤗������@��@���@��9@M��K�@��@��K�]@���@Ӊ���@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@�������������@����@م�@ȁ�@M����@��@�]@���@����@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ӊ���@M����@�]
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@ȁ������@�������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@ą������@���������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@֕�@��@���@��`���@�K�@�ȩ@ז�����@���@����������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@���@��@�����@���@���������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@�����@��@���@��@���@�ȩ@���@��
�����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ɩ��@���`�@�������@�����@M���@�ȩ@M��`���]^@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@����@�ȩ@M��`���]]^@֕�@���@����@M��@�ȩ@
%M��`���]]
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@㈙��@����@����@����@���@��@��@�K���@��@��������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@�������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ĥ��@�������@䓣�����@����@����������@M��������@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@���@��������]
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ĥ��@������@ţ������@��a���a����@Ԃ��@����������
%@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@Ɩ��@���k@���@������@�����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@㦖@����`����@�����@����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��@��������@�������@����@��������@���@��������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��@��������@�����@�������◁����V@���@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@◁��Ԗ���V
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@g@��@��������@�����@�������
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
%
%
%
%
%@@@@@@@@@@@@@@@@@@@@
%
%
%
%@@@@@@@@@@@@@@@@@@@@@�@���@��@���������@���������@�����@��@�
��@ƅ������@��k@
%@@@@@@@@@@@@@@@@@@����@���������@��@⣁�����@ׅ���������@ť��������@Ö���������@
%@@@@@@@@@@@@@@@@@@���@��@��@���������@��@���K����K���K
%@@@@@@@@@@@@@@@@@@@�}��@����@��@����
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ł��@����@��@���@���@�������@���@����K
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@��@���
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@م�����@�@�����
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@��@����@��@��
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@�`���`����`���
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@ؤ�����@�������
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ӆ���@����
%
%
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@㉔�������@��������
�
%
%@@@@@@@@@@@@@@@@@@@@@@@@����@��������@��������@��@����@���@�������
%
%
%@@@@@@@@@@@@@@@@@@@@@@@@剢��@剢��Á��
%
%
%
%
%@@�`����@����@����י���@����@����ĉ��@�����@��@���K����K�������@���@י�����@
%@@Ö�����@ㅙ��@��@���@���@ƅ���
%
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr b/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr
deleted file mode 100644
index 37d73de..0000000
--- a/tika-test-resources/src/test/resources/test-documents/envi_test_header.hdr
+++ /dev/null
@@ -1,16 +0,0 @@
-ENVI
-description = {
- GEO-TIFF File Imported into ENVI [Fri May 25 14:06:23 2012]}
-samples = 2400
-lines = 2400
-bands = 7
-header offset = 0
-file type = ENVI Standard
-data type = 2
-interleave = bip
-sensor type = Unknown
-byte order = 0
-map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , units=Meters}
-projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}
-coordinate system string = {PROJCS["Sinusoidal",GEOGCS["GCS_ELLIPSE_BASED_1",DATUM["D_ELLIPSE_BASED_1",SPHEROID["S_ELLIPSE_BASED_1",6371007.181,0.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Sinusoidal"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],UNIT["Meter",1.0]]}
-wavelength units = Unknown
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/footnotes.docx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/footnotes.docx b/tika-test-resources/src/test/resources/test-documents/footnotes.docx
deleted file mode 100644
index db4386c..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/footnotes.docx and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2 b/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2
deleted file mode 100644
index 7ab3416..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/gdas1.forecmwf.2014062612.grib2 and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/headerPic.docx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/headerPic.docx b/tika-test-resources/src/test/resources/test-documents/headerPic.docx
deleted file mode 100644
index 01072e5..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/headerPic.docx and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/headers.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/headers.mbox b/tika-test-resources/src/test/resources/test-documents/headers.mbox
deleted file mode 100644
index d607bc9..0000000
--- a/tika-test-resources/src/test/resources/test-documents/headers.mbox
+++ /dev/null
@@ -1,7 +0,0 @@
-From envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009
-Return-Path: <na...@domain.com>
-Subject: subject
-From: <au...@domain.com>
-Date: Tue, 9 Jun 2009 23:58:45 -0400
-
-Test content
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/jxl.xls
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/jxl.xls b/tika-test-resources/src/test/resources/test-documents/jxl.xls
deleted file mode 100644
index 569e21d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/jxl.xls and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/moby.zip
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/moby.zip b/tika-test-resources/src/test/resources/test-documents/moby.zip
deleted file mode 100644
index 71dac43..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/moby.zip and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml b/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml
deleted file mode 100644
index 38a7731..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/embedded_then_npe.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
-
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">main_content</write>
- <!-- auto detection wasn't working for some reason; add content-type as
- is to trigger mock on the embedded -->
- <embedded filename="embed1.xml" content-type="application/mock+xml">
- <mock>
- <metadata action="add" name="author">embeddedAuthor</metadata>
- <write element="p">some_embedded_content</write>
- </mock>
- </embedded>
- <throw class="java.lang.NullPointerException">another null pointer exception</throw>
-
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/example.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/example.xml b/tika-test-resources/src/test/resources/test-documents/mock/example.xml
deleted file mode 100644
index df1a762..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/example.xml
+++ /dev/null
@@ -1,51 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <!-- this file offers all of the options as documentation
- Parsing should stop at an IOException, of course
- -->
-
- <!-- action can be "add" or "set" -->
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <!-- element is the name of the sax event to write, p=paragraph
- if the element is not specified, the default is <p> -->
- <write element="p">some content</write>
- <!-- write something to System.out -->
- <print_out>writing to System.out</print_out>
- <!-- write something to System.err -->
- <print_err>writing to System.err</print_err>
- <!-- hang
- millis: how many milliseconds to pause. The actual hang time will probably
- be a bit longer than the value specified. heavy: whether or not the hang should do something computationally expensive.
- If the value is false, this just does a Thread.sleep(millis).
- This attribute is optional, with default of heavy=false.
- pulse_millis: (required if "heavy" is true), how often to check to see
- whether the thread was interrupted or that the total hang time exceeded the millis
- interruptible: whether or not the parser will check to see if its thread
- has been interrupted; this attribute is optional with default of true
- -->
- <hang millis="100" heavy="true" pulse_millis="10" interruptible="true" />
- <!-- throw an exception or error; optionally include a message or not -->
- <throw class="java.io.IOException">not another IOException</throw>
- <!-- perform a genuine OutOfMemoryError -->
- <oom/>
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml b/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml
deleted file mode 100644
index 6f090d4..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/fake_oom.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <throw class="java.lang.OutOfMemoryError">not another oom</throw>
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml b/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml
deleted file mode 100644
index df5bbfd..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/heavy_hang.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <hang millis="3000" heavy="true" pulse_millis="100" />
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml b/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml
deleted file mode 100644
index e3656a8..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/nothing_bad.xml
+++ /dev/null
@@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Geoffrey Chaucer</metadata>
- <write element="p">Whan that Aprille with his shoures soote</write>
- <write>The droghte of Marche hath perced to the roote,</write>
- <write>And bathed every veyne in swich licour,</write>
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml b/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
deleted file mode 100644
index 4561c3a..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <throw class="java.lang.NullPointerException">another null pointer exception</throw>
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml b/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml
deleted file mode 100644
index 33f3f83..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/null_pointer_no_msg.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <throw class="java.lang.NullPointerException"/>
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml b/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml
deleted file mode 100644
index 168751a..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/real_oom.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <oom/>
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml b/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml
deleted file mode 100644
index 991cdc2..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/sleep.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <hang millis="3000" heavy="false" />
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml b/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml
deleted file mode 100644
index 8d84ead..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/sleep_interruptible.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <hang millis="3000" heavy="false" interruptible="true" />
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml b/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml
deleted file mode 100644
index 7994095..0000000
--- a/tika-test-resources/src/test/resources/test-documents/mock/sleep_not_interruptible.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <hang millis="3000" heavy="false" interruptible="false" />
-</mock>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/multiline.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/multiline.mbox b/tika-test-resources/src/test/resources/test-documents/multiline.mbox
deleted file mode 100644
index fff7dfb..0000000
--- a/tika-test-resources/src/test/resources/test-documents/multiline.mbox
+++ /dev/null
@@ -1,5 +0,0 @@
-From envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009
-Received: from xxx
- by xxx with xxx; date
-
-Test content
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/pictures.ppt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/pictures.ppt b/tika-test-resources/src/test/resources/test-documents/pictures.ppt
deleted file mode 100644
index 9f6ce6d..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/pictures.ppt and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/protect.xlsx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/protect.xlsx b/tika-test-resources/src/test/resources/test-documents/protect.xlsx
deleted file mode 100644
index 1767b14..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/protect.xlsx and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx b/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx
deleted file mode 100644
index 1767b14..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/protectedFile.xlsx and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx b/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx
deleted file mode 100644
index 09f6a77..0000000
Binary files a/tika-test-resources/src/test/resources/test-documents/protectedSheets.xlsx and /dev/null differ
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/quoted.mbox
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/quoted.mbox b/tika-test-resources/src/test/resources/test-documents/quoted.mbox
deleted file mode 100644
index 05d0fd8..0000000
--- a/tika-test-resources/src/test/resources/test-documents/quoted.mbox
+++ /dev/null
@@ -1,4 +0,0 @@
-From envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009
-
-Test content
-> quoted stuff
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/resume.html
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/resume.html b/tika-test-resources/src/test/resources/test-documents/resume.html
deleted file mode 100644
index 1bd747a..0000000
--- a/tika-test-resources/src/test/resources/test-documents/resume.html
+++ /dev/null
@@ -1,73 +0,0 @@
-
-
- <div class="js-helper">
- <style type="text/css">#style_13209008630000000884_BODY{background-color:#FFFFFF;color:#000000;MARGIN:0px 1px;font-family:Tahoma,Arial,Verdana,Sans-Serif}#style_13209008630000000884 TD{font-size:13px;font-family:Tahoma,Arial,Verdana,Sans-Serif;vertical-align:top}#style_13209008630000000884 CAPTION{font-size:13px;font-weight:bold;text-align:left}#style_13209008630000000884 TR.style_13209008630000000884thead TD{font-weight:bold;text-align:center; padding-bottom:6px;padding-top:6px;padding-left:2px;padding-right:2px}#style_13209008630000000884 H1{font-size:24px;margin-bottom:15px;margin-top:5px;display:block;font-weight:normal;}#style_13209008630000000884 H2{font-size:22px;margin-bottom:5px;margin-top:5px;display:block;font-weight:normal;letter-spacing:1px}#style_13209008630000000884 H1.style_13209008630000000884in, #style_13209008630000000884 H2.style_13209008630000000884in, #style_13209008630000000884 H3.style_13209008630000000884in{font-size:100%;margin-bottom:0px;margin-top:0px;di
splay:inline;}#style_13209008630000000884 A, #style_13209008630000000884 A.style_13209008630000000884notvisited:visited, #style_13209008630000000884 .style_13209008630000000884notvisited A:visited, #style_13209008630000000884 .style_13209008630000000884menu A:visited{color:#00418F;text-decoration:none}#style_13209008630000000884 A:visited{color:#6699CC;text-decoration:none;}#style_13209008630000000884 A:hover, #style_13209008630000000884 A.style_13209008630000000884notvisited:hover, #style_13209008630000000884 .style_13209008630000000884notvisited A:hover, #style_13209008630000000884 .style_13209008630000000884menu A:hover{color:#990000;text-decoration:underline}#style_13209008630000000884 .style_13209008630000000884bold, #style_13209008630000000884 .style_13209008630000000884bold H1{font-weight:bold}#style_13209008630000000884 .style_13209008630000000884u{text-decoration:underline}#style_13209008630000000884 .style_13209008630000000884gray, #style_13209008630000000884 A.style_13209
008630000000884gray:visited, #style_13209008630000000884 LEGEND{color:#7A7A7A}#style_13209008630000000884 .style_13209008630000000884red, #style_13209008630000000884 A.style_13209008630000000884red:visited{color:#C2311A}#style_13209008630000000884 EM, #style_13209008630000000884 .style_13209008630000000884imp, #style_13209008630000000884 .style_13209008630000000884field_warning{color:#C2311A;font-weight:bold;font-style:normal}#style_13209008630000000884 TABLE.style_13209008630000000884bl_table TR TD{padding:2px; padding-left:10px}#style_13209008630000000884 TD.style_13209008630000000884bl_row_name{color:#555; width:10%}#style_13209008630000000884 TD.style_13209008630000000884vacancydark, #style_13209008630000000884 TR.style_13209008630000000884vacancydark TD, #style_13209008630000000884 TD.style_13209008630000000884resumedark, #style_13209008630000000884 TR.style_13209008630000000884resumedark TD, #style_13209008630000000884 TD.style_13209008630000000884serverdark, #style_1320900863
0000000884 TR.style_13209008630000000884serverdark TD{text-align:center;padding-bottom:3px;padding-top:3px;padding-left:1px;padding-right:1px;font-weight:bold}#style_13209008630000000884 TD.style_13209008630000000884vacancydark, #style_13209008630000000884 TR.style_13209008630000000884vacancydark TD, #style_13209008630000000884 TD.style_13209008630000000884vacancydark A, #style_13209008630000000884 TD.style_13209008630000000884vacancydark A:visited, #style_13209008630000000884 TD.style_13209008630000000884vacancydark A:hover, #style_13209008630000000884 TD.style_13209008630000000884resumedark, #style_13209008630000000884 TR.style_13209008630000000884resumedark TD, #style_13209008630000000884 TD.style_13209008630000000884resumedark A, #style_13209008630000000884 TD.style_13209008630000000884resumedark A:visited, #style_13209008630000000884 TD.style_13209008630000000884resumedark A:hover, #style_13209008630000000884 TD.style_13209008630000000884serverdark, #style_13209008630000000884
TR.style_13209008630000000884serverdark TD, #style_13209008630000000884 TD.style_13209008630000000884serverdark A, #style_13209008630000000884 TD.style_13209008630000000884serverdark A:visited, #style_13209008630000000884 TD.style_13209008630000000884serverdark A:hover{color:#000000;}#style_13209008630000000884 TD.style_13209008630000000884vacancydark, #style_13209008630000000884 TR.style_13209008630000000884vacancydark TD{background-color:#FFDDBB;}#style_13209008630000000884 TD.style_13209008630000000884vacancylight, #style_13209008630000000884 TR.style_13209008630000000884vacancylight TD{background-color:#FFF5EC}#style_13209008630000000884 TD.style_13209008630000000884resumedark, #style_13209008630000000884 TR.style_13209008630000000884resumedark TD{background-color:#D3E9E9;}#style_13209008630000000884 TD.style_13209008630000000884resumelight, #style_13209008630000000884 TR.style_13209008630000000884resumelight TD{background-color:#ECF8F7}#style_13209008630000000884 TD.style_13209
008630000000884serverdark, #style_13209008630000000884 TR.style_13209008630000000884serverdark TD{background-color:#ABC2D5;}#style_13209008630000000884 TR.style_13209008630000000884serverlight TD, #style_13209008630000000884 TD.style_13209008630000000884serverlight{background-color:#E2EBF5}#style_13209008630000000884 TD.style_13209008630000000884blankheader1{font-size:24px; padding:10px}#style_13209008630000000884 TD.style_13209008630000000884blankheader2{font-size:22px; padding:10px}#style_13209008630000000884 TABLE.style_13209008630000000884resumelist TR.thead TD{background-color:#ABC2D5;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist TR.thead TD{background-color:#ABC2D5;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TR.thead TD{background-color:#DBDBDB;}#style_13209008630000000884 TABLE TR.style_13209008630000000884wr TD{background-color:#FFFFFF}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD{bord
er-bottom:#DBDBDB 1px solid}#style_13209008630000000884 .style_13209008630000000884list TR TD{background-color:#E2EBF5;padding:5px}#style_13209008630000000884 .style_13209008630000000884list TR.thead TD{background-color:#ABC2D5;color:#555555;text-align:center; padding-bottom:8px;padding-top:8px;padding-left:1px;padding-right:1px;font-weight:bold;}#style_13209008630000000884 .style_13209008630000000884list TR.wr TD{background-color:#F3F7FB}#style_13209008630000000884 A.style_13209008630000000884list_details, #style_13209008630000000884 A.style_13209008630000000884list_details:visited, #style_13209008630000000884 A.style_13209008630000000884list_details:hover{color:#7A7A7A;text-decoration:none;line-height:120%}#style_13209008630000000884 TD.style_13209008630000000884cell, #style_13209008630000000884 TD.style_13209008630000000884c{padding-top:3px;padding-left:5px;padding-right:5px}#style_13209008630000000884 BIG{font-size:24px}#style_13209008630000000884 .style_13209008630000000884smal
l, #style_13209008630000000884 SMALL{font-size:85%}#style_13209008630000000884 UL{margin-left:25px;margin-bottom:0px}#style_13209008630000000884 TD.style_13209008630000000884small, #style_13209008630000000884 .style_13209008630000000884verysmall, #style_13209008630000000884 .style_13209008630000000884verysmall INPUT, #style_13209008630000000884 .style_13209008630000000884verysmall SELECT{font-size:11px}#style_13209008630000000884 DIV.style_13209008630000000884localmenu{padding-top:10px;margin-bottom:15px;}#style_13209008630000000884 DIV.style_13209008630000000884localmenu A, #style_13209008630000000884 DIV.style_13209008630000000884localmenu A:visited{text-decoration:underline;font-weight:bold}#style_13209008630000000884 DIV.style_13209008630000000884comment{font-size:85%; background-color:#DDFFDD; padding:4px; border:1px solid #CCC;cursor:default;}#style_13209008630000000884 HR{color:#ABC2D5;background-color:#ABC2D5;height:1px;border:0px solid #ABC2D5}#style_13209008630000000884 DI
V.style_13209008630000000884dotsline{font-size:1px; margin-top:4px; margin-bottom:5px; border-bottom:#BACBD7 1px dotted}#style_13209008630000000884 TABLE.style_13209008630000000884rctable TR TD{background-color:#E5EDF7;}#style_13209008630000000884 TD.style_13209008630000000884rc1{padding-top:10px; padding-left:10px;}#style_13209008630000000884 TD.style_13209008630000000884rc2{font-size:1px; width:10px;}#style_13209008630000000884 TD.style_13209008630000000884rc3{height:10px; font-size:1px;}#style_13209008630000000884 TD.style_13209008630000000884rc4{height:10px; font-size:1px;}#style_13209008630000000884 SPAN.style_13209008630000000884super{color:#003398;font-size:150%}#style_13209008630000000884 SPAN.style_13209008630000000884job{color:#FF0000;font-size:150%}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD TABLE.to_site_button{background-color:#99cc00; margin:0px 5px 3px 0px;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail
TD TABLE.to_site_button TD{background-color:#99cc00; font-weight:normal; color:#ffffff; border-bottom:0px; padding-top:6px; padding-right:7px; padding-bottom:6px; padding-left:7px; vertical-align:middle; text-align:center;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD TABLE.to_site_button TD A, #style_13209008630000000884 TABLE.style_13209008630000000884to_site_button TD A:visited{color:#ffffff; text-decoration:none; font-weight:normal;}#style_13209008630000000884 TABLE.style_13209008630000000884vaclist_for_mail TD TABLE.to_site_button TD A:hover{color:#ffffff; text-decoration:underline; font-weight:normal;}#style_13209008630000000884 .style_13209008630000000884row{clear:left; padding-bottom:4px;}#style_13209008630000000884 .style_13209008630000000884row2{margin-bottom:8px;}#style_13209008630000000884 .style_13209008630000000884col1{float:left; width:140px; color:#555555; margin-right:-145px;}#style_13209008630000000884 .style_13209008630000000884c
ol2{margin-left:145px;}#style_13209008630000000884 DIV.style_13209008630000000884resume_rightcol{float:right; width:280px; margin:0px 0px 10px 30px;}#style_13209008630000000884 DIV.style_13209008630000000884blankheader1{font-size:190%;}
-</style>
- <div id="style_13209008630000000884" class="mr_read__body">
- <base target="_self" href="http://e.mail.ru/cgi-bin/" />
-
- <div id="style_13209008630000000884_BODY">
-
-
-
-<style type="text/css" ></style>
-
-
-<table width="100%" cellspacing="0" cellpadding="0" height="100%" border="0" >
-<tr ><td >
-
-</td></tr>
-<tr ><td style="padding:5px" height="100%" >
-Здравствуйте, !<br >
-<br >
-Предлагаем Вам ознакомиться со списком зарегистрированных компаний, представители которых просмотрели Ваше резюме за последние сутки.<br >
-<br >
-<li ><a target="_blank" href="/cgi-bin/link?check=1&cnf=710139&url=http%3A%2F%2;0,0" >Компании, просмотревшие резюме № .</a> Новые: <b >1.</b></li><br >
-<br >
-Эти сведения предоставляются Вам исключительно для информации. Вы можете оперативно отслеживать, какие именно компании нашли в базе данных Superjob Ваше резюме и заинтересовались им.<br >
-<br >
-Если Ваше резюме размещено в закрытом доступе, то его могут просматривать только те работодатели, которым Вы отправили его самостоятельно.<br >
-Историю отправки своего резюме Вы можете посмотреть по ссылке «История рассылки резюме».<br >
-<br >
-<br >
-<b >Внимание!</b><br >
-В процессе поиска работы Вы можете столкнуться с такими предложениями работодателей или кадровых агентств, в которых Вас будут просить внести оплату (за предварительное обучение, за оформление документов, за оформление обязательной страховки, на закупку первой партии продукции компании, предназначенной для продажи и т.п.) или предоставить отсканированные копии документов (паспорта, военного билета, трудовой книжки, водительских прав, пенсионного удостоверния и т.п.) для якобы предварительного оформления или подтверждения данных, указа
нных в Вашем резюме.<br >
-Это один из признаков мошенничества! Мы рекомендуем Вам очень осторожно относиться к таким предложениям и по возможности избегать собеседований с подобными работодателями.<br >
-<br >
-Также мы настоятельно не рекомендуем отправлять платные SMS-сообщения на короткие номера для получения контактов или другой информации о вакансии или же для получения результатов тестирования. С организациями, которые оказывают подобные услуги, мы не сотрудничаем и предупреждаем, что это тоже один из приемов мошенничества.<br >
-<br >
-<br >
-<em >x</em> <a target="_blank" href="/cgi-bin/link?check=1&cnf=8d972a&url=http%3A%2F%2Fwww.sup;0,0" >Отключить уведомления о новых просмотрах моих резюме</a><br >
-<br >
-По ссылкам в этом письме можно войти в систему без ввода пароля.
-<br ><br >
-</td>
-</tr>
-<tr >
-<td >
-<span class="style_13209008630000000884noprint" ><br ><br >Если у Вас есть пожелания и идеи по улучшению сервиса Superjob, пожалуйста, <a target="_blank" href="/cgi-bin/link?check=1;0,0" >напишите нам</a>.<br ><br ></span>
-<table width="100%" cellspacing="0" cellpadding="10" border="0" class="style_13209008630000000884noprint" >
-<tr ><td align="center" style="border-top:1px solid #BACBD7;" >
-<a target="_blank" href="/cgi-bin/link?check=1&cnf=8fa2f9&url=http%3A%2F%2Fwww.;0,0" ><big >Superjob — Работа должна доставлять удовольствие!</big></a>
-</td></tr>
-</table>
-<table width="100%" cellspacing="1" cellpadding="0" border="0" class="style_13209008630000000884noprint" >
-<tr ><td align="center" style="padding:5px" >
-<span style="color:#999999;font-size:8pt;" >Письмо отправлено: xx.xx.xxxx xx:xx:xx</span>
-</td></tr>
-</table>
-
-</td></tr>
-</table>
-
-
-
-</div>
-
-
- <base target="_self" href="http://e.mail.ru/cgi-bin/" />
- </div>
-</div>
-
-
-
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/rsstest.rss
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/rsstest.rss b/tika-test-resources/src/test/resources/test-documents/rsstest.rss
deleted file mode 100644
index 758f6a1..0000000
--- a/tika-test-resources/src/test/resources/test-documents/rsstest.rss
+++ /dev/null
@@ -1,36 +0,0 @@
-<?xml version="1.0" encoding="ISO-8859-1" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<rss version="0.91">
- <channel>
- <title>TestChannel</title>
- <link>http://test.channel.com/</link>
- <description>Sample RSS File for Junit test</description>
- <language>en-us</language>
-
- <item>
- <title>Home Page of Chris Mattmann</title>
- <link>http://www-scf.usc.edu/~mattmann/</link>
- <description>Chris Mattmann's home page</description>
- </item>
- <item>
- <title>Awesome Open Source Search Engine</title>
- <link>http://www.nutch.org/</link>
- <description>Yup, that's what it is</description>
- </item>
- </channel>
-</rss>
http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt b/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt
deleted file mode 100644
index 050cc87..0000000
--- a/tika-test-resources/src/test/resources/test-documents/russian.cp866.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-�������, � ��㤥��� ������ ����,
-� �� ���� ��襫; �� ᨫ�� ��.
-����, ����������� �������� � ����
-��蠤��, ������ 墮���� ���.
-�, ����� �����, � ᯮ����⢨� 稭���,
-��蠤�� ����� ��� 㧤�� �㦨箪