You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ed...@apache.org on 2005/07/03 23:39:56 UTC

svn commit: r208978 - in /incubator/jackrabbit/trunk/contrib/textfilters: ./ src/ src/java/ src/java/META-INF/ src/java/META-INF/services/ src/java/org/ src/java/org/apache/ src/java/org/apache/jackrabbit/ src/java/org/apache/jackrabbit/core/ src/java/...

Author: edgarpoce
Date: Sun Jul  3 14:39:55 2005
New Revision: 208978

URL: http://svn.apache.org/viewcvs?rev=208978&view=rev
Log:
Added textfilters contribution, thanks Ján Halasa!.

Added:
    incubator/jackrabbit/trunk/contrib/textfilters/
    incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/README.txt   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/project.properties   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/project.xml
    incubator/jackrabbit/trunk/contrib/textfilters/src/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java   (with props)
    incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java   (with props)

Added: incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt Sun Jul  3 14:39:55 2005
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
\ No newline at end of file

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt Sun Jul  3 14:39:55 2005
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/README.txt
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/README.txt?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/README.txt (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/README.txt Sun Jul  3 14:39:55 2005
@@ -0,0 +1,20 @@
+TextFilters allow Jackrabbit to extract text from binary
+properties for indexing purposes.
+
+This project contains TextFilter implementations for the 
+following binary formats:
+
+1. MsExcel
+2. MsPowerPoint
+3. MsWord
+4. Pdf
+
+How to register in jackrabbit?
+Build the jar file and place it in the Jackrabbit 
+classpath. The filters will be automatically loaded 
+on startup.
+
+For further information, see the javadocs for:
+org.apache.jackrabbit.core.query.TextFilter
+org.apache.jackrabbit.core.query.TextFilterService
+

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/README.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml Sun Jul  3 14:39:55 2005
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE suppressions PUBLIC
+    "-//Puppy Crawl//DTD Suppressions 1.0//EN"
+    "http://www.puppycrawl.com/dtds/suppressions_1_0.dtd">
+
+<suppressions>
+    <!-- 
+      Suppressions for generated JCRQL parser
+    -->
+    <suppress checks=".*" files="(JJT)?JCRSQL.*.java"/>
+    <!-- Suppressions for the generated XPath parser -->
+    <suppress checks=".*" files="(JJT)?XPath.*.java"/>
+</suppressions>

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml Sun Jul  3 14:39:55 2005
@@ -0,0 +1,171 @@
+<?xml version="1.0"?>
+<!--
+   Copyright 2004-2005 The Apache Software Foundation or its licensors,
+                       as applicable.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+  -->
+  
+<!DOCTYPE module PUBLIC
+    "-//Puppy Crawl//DTD Check Configuration 1.1//EN"
+    "http://www.puppycrawl.com/dtds/configuration_1_1.dtd">
+
+<!--
+  Checkstyle checks configured for Maven.
+-->
+
+<module name="Checker">
+
+    <!-- Checks that a package.html file exists for each package.     -->
+    <!-- See http://checkstyle.sf.net/config_javadoc.html#PackageHtml -->
+    <module name="PackageHtml"/>
+
+    <!-- Checks whether files end with a new line.                        -->
+    <!-- See http://checkstyle.sf.net/config_misc.html#NewlineAtEndOfFile -->
+    <module name="NewlineAtEndOfFile"/>
+
+    <!-- Checks that property files contain the same keys.         -->
+    <!-- See http://checkstyle.sf.net/config_misc.html#Translation -->
+    <module name="Translation"/>
+
+    <module name="TreeWalker">
+
+        <property name="cacheFile" value="${checkstyle.cache.file}"/>
+
+        <!-- ************************************************************** -->
+        <!-- Checks that are different from the sun coding conventions ones -->
+        <!-- ************************************************************** -->
+
+        <module name="Header">
+            <property name="headerFile" value="${basedir}/HEADER.txt"/>
+        </module>
+        <!-- <property name="tabWidth" value="4"/> -->
+        <module name="LeftCurly">
+          <property name="option" value="eol"/>
+        </module>
+        <module name="LineLength">
+          <property name="max" value="132"/>
+          <property name="ignorePattern" value="\* \$"/>
+        </module>
+        <module name="MethodLength">
+          <property name="max" value="175"/>
+        </module>
+        <module name="ConstantName">
+          <property name="format" value="log|^[a-zA-Z][a-zA-Z0-9_]*$"/>
+        </module>
+
+        <!-- ************************************************************** -->
+        <!-- Default Sun coding conventions checks                          -->
+        <!-- ************************************************************** -->
+
+        <!-- Checks for Javadoc comments.                     -->
+        <!-- See http://checkstyle.sf.net/config_javadoc.html -->
+        <module name="JavadocMethod"/>
+        <module name="JavadocType"/>
+        <module name="JavadocVariable"/>
+
+        <!-- Checks for Naming Conventions.                  -->
+        <!-- See http://checkstyle.sf.net/config_naming.html -->
+        <module name="LocalFinalVariableName"/>
+        <module name="LocalVariableName"/>
+        <module name="MethodName"/>
+        <module name="PackageName"/>
+        <module name="ParameterName"/>
+        <module name="StaticVariableName"/>
+        <module name="TypeName"/>
+        <module name="MemberName"/>
+
+        <!-- Checks for imports                              -->
+        <!-- See http://checkstyle.sf.net/config_import.html -->
+        <module name="AvoidStarImport"/>
+        <module name="IllegalImport"/> <!-- defaults to sun.* packages -->
+        <module name="RedundantImport"/>
+        <module name="UnusedImports"/>
+
+
+        <!-- Checks for Size Violations.                    -->
+        <!-- See http://checkstyle.sf.net/config_sizes.html -->
+        <module name="FileLength"/>
+        <module name="ParameterNumber"/>
+
+
+        <!-- Checks for whitespace                               -->
+        <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+        <module name="EmptyForIteratorPad"/>
+        <module name="NoWhitespaceAfter"/>
+        <module name="NoWhitespaceBefore"/>
+        <module name="OperatorWrap"/>
+        <module name="TabCharacter"/>
+        <module name="WhitespaceAfter"/>
+        <module name="WhitespaceAround"/>
+
+
+        <!-- Modifier Checks                                    -->
+        <!-- See http://checkstyle.sf.net/config_modifiers.html -->
+        <module name="ModifierOrder"/>
+        <module name="RedundantModifier"/>
+
+
+        <!-- Checks for blocks. You know, those {}'s         -->
+        <!-- See http://checkstyle.sf.net/config_blocks.html -->
+        <module name="AvoidNestedBlocks"/>     
+        <module name="NeedBraces"/>
+
+        <!-- Checks for common coding problems               -->
+        <!-- See http://checkstyle.sf.net/config_coding.html -->
+        <!-- <module name="AvoidInlineConditionals"/> -->      <!-- DISABLED-->
+        <module name="DoubleCheckedLocking"/>
+        <module name="EqualsHashCode"/>
+        <module name="IllegalInstantiation"/>
+        <module name="InnerAssignment"/>
+        <module name="MissingSwitchDefault"/>
+        <module name="RedundantThrows">
+            <property name="allowUnchecked" value="true"/>   <!-- DISABLED -->
+            <property name="allowSubclasses" value="true"/>   <!-- DISABLED -->
+        </module>
+        <module name="SimplifyBooleanExpression"/>
+        <module name="SimplifyBooleanReturn"/>
+
+        <!-- Checks for class design                         -->
+        <!-- See http://checkstyle.sf.net/config_design.html -->
+        <module name="DesignForExtension">
+            <property name="severity" value="ignore"/>   <!-- DISABLED -->
+        </module>
+        <module name="HideUtilityClassConstructor"/>
+        <module name="InterfaceIsType"/>
+        <module name="VisibilityModifier">
+            <!-- Protected member variables are widely used in Jackrabbit -->
+            <property name="protectedAllowed" value="true"/>
+        </module>
+
+
+        <!-- Miscellaneous other checks.                   -->
+        <!-- See http://checkstyle.sf.net/config_misc.html -->
+        <module name="ArrayTypeStyle"/>
+        <module name="FinalParameters">
+            <property name="severity" value="ignore"/>   <!-- DISABLED -->
+        </module>
+        <module name="GenericIllegalRegexp">
+            <property name="format" value="\s+$"/>
+            <property name="message" value="Line has trailing spaces."/>
+        </module>
+        <module name="TodoComment"/>
+        <module name="UpperEll"/>
+
+    </module>
+    
+    <module name="SuppressionFilter">
+        <property name="file" value="checkstyle-suppressions.xml"/>
+    </module>    
+
+</module>

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/project.properties
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/project.properties?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/project.properties (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/project.properties Sun Jul  3 14:39:55 2005
@@ -0,0 +1,101 @@
+#  Copyright 2003-2005 The Apache Software Foundation or its licensors,
+#                      as applicable
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+######################################################################
+# Apache Central Repository
+######################################################################
+maven.repo.central=www.apache.org
+maven.repo.central.directory=/www/www.apache.org/dist/java-repository
+maven.remote.group=apcvs
+maven.changelog.factory = org.apache.maven.svnlib.SvnChangeLogFactory
+
+######################################################################
+# JUnit Testing
+######################################################################
+maven.test.failure = false
+maven.junit.fork=true
+#maven.junit.sysproperties=org.xml.sax.driver java.security.auth.login.config
+maven.junit.sysproperties=org.xml.sax.driver
+org.xml.sax.driver=org.apache.xerces.parsers.SAXParser
+#java.security.auth.login.config=applications/test/jaas.config
+
+
+#If you wish to skip tests when doing builds, uncomment
+#maven.test.skip = true
+
+######################################################################
+# Checkstyle
+######################################################################
+maven.checkstyle.properties= checkstyle.xml
+maven.linkcheck.enable=false 
+
+######################################################################
+# JavaDoc
+#
+# javadoc urls can be added here, multiple urls are appended using a comma
+#
+# maven.javadoc.links = http://foo/bar/api,\
+#                       http://flim/flam/api/
+######################################################################
+maven.javadoc.links=http://java.sun.com/j2se/1.4.2/docs/api/,http://www.day.com/maven/jsr170/javadocs/jcr-0.16.4.1/
+maven.javadoc.author=false
+maven.javadoc.version=false
+
+######################################################################
+# Other opts
+######################################################################
+# uncomment the next line to work in offline mode (no jar download & no linkcheck)
+#maven.mode.online=
+
+maven.compile.debug=on
+maven.compile.deprecation=off
+maven.compile.optimize=off
+maven.compile.source=1.4
+maven.compile.target=1.4
+
+maven.jarResources.basedir=src/java
+maven.jar.excludes=**/package.html
+
+# Location of the generated query language parsers. Needed for
+# the Maven Eclipse plugin to automatically locate the generated
+# source files. Note that this value matches the hardcoded path
+# in the Maven JavaCC plugin. Therefore, do not change this value!
+maven.gen.src=${maven.build.dir}/generated-src/main
+
+# specifying additional remote repository for downloading dependencies 
+# not available at www.ibiblio.org/maven/
+maven.repo.remote = http://www.ibiblio.org/maven/
+
+######################################################################
+# Site L&F
+######################################################################
+# maven.xdoc.jsl=
+maven.xdoc.date=
+maven.xdoc.poweredby.image=maven-feather.png
+maven.xdoc.version=${pom.currentVersion}
+maven.xdoc.developmentProcessUrl=http://incubator.apache.org/projects/jackrabbit.html
+maven.changelog.range=60
+maven.changelog.factory=org.apache.maven.svnlib.SvnChangeLogFactory
+
+# ------------------------------------------------------------------------
+# M A V E N  J A R  O V E R R I D E
+# ------------------------------------------------------------------------
+#maven.jar.override = on
+#maven.jar.jcr = ${basedir}/lib/jcr.jar
+
+######################################################################
+# Site Deploy (into ../jackrabbit-site for checkout on incubator.apache.org)
+######################################################################
+maven.site.deploy.method=fs

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/project.properties
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/project.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/project.xml?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/project.xml (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/project.xml Sun Jul  3 14:39:55 2005
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Copyright 2004-2005 The Apache Software Foundation or its licensors,
+                       as applicable.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+  -->
+<project>
+  <pomVersion>3</pomVersion>
+  <artifactId>jackrabbit-textfilters</artifactId>
+  <groupId>jackrabbit</groupId>
+  <id>jackrabbit</id>
+  <name>Jackrabbit - Text filters</name>
+  <currentVersion>1.0-dev</currentVersion>
+  <organization>
+    <name>The Apache Software Foundation</name>
+    <url>http://incubator.apache.org/projects/jackrabbit.html</url>
+    <logo>http://incubator.apache.org/images/apache-incubator-logo.png</logo>
+  </organization>
+  <package>org.apache.jackrabbit.*</package>
+  <logo>/images/jackrabbitlogo.gif</logo>
+  <url>http://incubator.apache.org/projects/jackrabbit.html</url>
+  <issueTrackingUrl>http://issues.apache.org/jira/browse/JCR</issueTrackingUrl>
+  <siteDirectory>../jackrabbit-site</siteDirectory>
+  <distributionSite>incubator.apache.org</distributionSite>
+  <distributionDirectory>/www/www.apache.org/dist/java-repository/</distributionDirectory>
+  <repository>
+    <connection>scm:subversion:http://svn.apache.org/repos/asf/incubator/jackrabbit/trunk</connection>
+    <developerConnection>scm:subversion:https://svn.apache.org/repos/asf/incubator/jackrabbit/trunk</developerConnection>
+    <url>http://svn.apache.org/viewcvs</url>
+  </repository>
+  <mailingLists>
+    <!--
+    <mailingList>
+      <name>Jackrabbit Users List</name>
+      <subscribe>user-subscribe@jackrabbit.apache.org</subscribe>
+      <unsubscribe>user-unsubscribe@jackrabbit.apache.org</unsubscribe>
+      <archive>http://mail-archives.apache.org/eyebrowse/SummarizeList?listName=user@jackrabbit.apache.org</archive>
+    </mailingList>
+-->
+    <mailingList>
+      <name>Jackrabbit Developer List</name>
+      <subscribe>jackrabbit-dev-subscribe at incubator.apache.org</subscribe>
+      <unsubscribe>jackrabbit-dev-unsubscribe at incubator.apache.org</unsubscribe>
+      <archive>http://incubator.apache.org/mail/jackrabbit-dev/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Jackrabbit Source Control List</name>
+      <subscribe>jackrabbit-commits-subscribe at incubator.apache.org</subscribe>
+      <unsubscribe>jackrabbit-commits-unsubscribe at incubator.apache.org</unsubscribe>
+      <archive>http://incubator.apache.org/mail/jackrabbit-commits/</archive>
+    </mailingList>
+  </mailingLists>
+  <developers>
+    <developer>
+      <name>Roy T. Fielding</name>
+      <id>fielding</id>
+      <organization>Day Software</organization>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <name>Stefan Guggisberg</name>
+      <id>stefan</id>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Stefano Mazzocchi</name>
+      <id>stefano</id>
+      <timezone>-5</timezone>
+    </developer>
+    <developer>
+      <name>David Nuescheler</name>
+      <id>uncled</id>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Dominique Pfister</name>
+      <id>dpfister</id>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Peeter Piegaze</name>
+      <id>ppiegaze</id>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Gianugo Rabellino</name>
+      <id>gianugo</id>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Tim Reilly</name>
+      <id>treilly</id>
+      <email>treilly at apache dot org</email>
+      <timezone>-5</timezone>
+    </developer>
+    <developer>
+      <name>Marcel Reutegger</name>
+      <id>mreutegg</id>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Paul Russell</name>
+      <id>prussell</id>
+      <timezone>+0</timezone>
+    </developer>
+    <developer>
+      <name>Andrew Savory</name>
+      <id>asavory</id>
+      <timezone>+0</timezone>
+    </developer>
+    <developer>
+      <name>Tobias Strasser</name>
+      <id>tripod</id>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Sylvain Wallez</name>
+      <id>sylvain</id>
+      <timezone>+1</timezone>
+    </developer>
+    <developer>
+      <name>Jukka Zitting</name>
+      <id>jukka</id>
+      <email>jz@yukatan.fi</email>
+      <organization>Yukatan</organization>
+      <timezone>+2</timezone>
+    </developer>
+  </developers>
+  <contributors>
+    <contributor>
+      <name>Serge Huber</name>
+      <timezone>+1</timezone>
+    </contributor>
+    <contributor>
+      <name>Felix Meschberger</name>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </contributor>
+    <contributor>
+      <name>Edgar Poce</name>
+      <email>edgarpoce@gmail.com</email>
+    </contributor>
+    <contributor>
+      <name>Angela Schreiber</name>
+      <organization>Day Software</organization>
+      <timezone>+1</timezone>
+    </contributor>
+  </contributors>
+  <licenses>
+    <license>
+      <name>The Apache Software License, Version 2.0</name>
+      <url>/LICENSE.txt</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+  <dependencies>
+    <!--
+      cqfs-jackrabbit and cqfs are optional runtime dependencies 
+      (an alternative FileSystem implementation); 
+      commons-logging is a dependency of cqfs
+    -->
+    <dependency>
+      <groupId>commons-collections</groupId>
+      <artifactId>commons-collections</artifactId>
+      <version>3.1</version>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>1.2.8</version>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <version>1.0</version>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>poi</groupId>
+      <artifactId>poi</artifactId>
+      <version>2.0-final-20040126</version>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>pdfbox</groupId>
+      <artifactId>pdfbox</artifactId>
+      <version>0.6.4</version>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>jackrabbit</groupId>
+      <artifactId>jackrabbit</artifactId>
+      <version>0.16.4.1-dev</version>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>textmining</groupId>
+      <artifactId>tm-extractors</artifactId>
+      <version>0.4</version>
+      <type>jar</type>
+      <url>http://www.textmining.org</url>
+    </dependency>
+    <dependency>
+      <groupId>jsr170</groupId>
+      <artifactId>jcr</artifactId>
+      <version>0.16.4.1</version>
+      <type>jar</type>
+      <url>http://www.day.com/maven/jsr170/jars/jcr-0.16.4.1.jar</url>
+    </dependency>
+  </dependencies>
+  <build>
+    <sourceDirectory>src/java</sourceDirectory>
+    <unitTestSourceDirectory>src/test</unitTestSourceDirectory>
+    <unitTest>
+      <includes>
+        <include>**/*TestAll.java</include>
+      </includes>
+      <resources>
+        <resource>
+          <directory>src/test</directory>
+          <includes>
+            <include>**/*.xml</include>
+            <include>**/*.txt</include>
+          </includes>
+          <filtering>false</filtering>
+        </resource>
+      </resources>
+    </unitTest>
+    <!-- J A R  R E S O U R C E S -->
+    <!-- Resources that are packaged up inside the JAR file -->
+    <resources>
+      <resource>
+        <directory>src/java</directory>
+        <includes>
+          <include>**/*.xml</include>
+          <include>**/*.properties</include>
+          <include>**/*.TextFilterService</include>
+        </includes>
+        <filtering>false</filtering>
+      </resource>
+    </resources>
+  </build>
+  <reports>
+    <report>maven-changelog-plugin</report>
+    <report>maven-changes-plugin</report>
+    <!-- <report>maven-checkstyle-plugin</report> -->
+    <!-- <report>maven-clover-plugin</report> -->
+    <!-- <report>maven-developer-activity-plugin</report> -->
+    <!-- <report>maven-file-activity-plugin</report> -->
+    <report>maven-javadoc-plugin</report>
+    <!-- <report>maven-jellydoc-plugin</report> -->
+    <report>maven-junit-report-plugin</report>
+    <report>maven-jxr-plugin</report>
+    <report>maven-license-plugin</report>
+    <!-- <report>maven-linkcheck-plugin</report> -->
+    <!-- <report>maven-statcvs-plugin</report> -->
+    <report>maven-tasklist-plugin</report>
+  </reports>
+</project>
+

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService Sun Jul  3 14:39:55 2005
@@ -0,0 +1,24 @@
+# Copyright 2004-2005 The Apache Software Foundation or its licensors,
+#                     as applicable.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# This file lists all available TextFilter implementations that are shipped
+# with Jackrabbit.
+#
+
+org.apache.jackrabbit.core.query.MsExcelTextFilter
+org.apache.jackrabbit.core.query.MsWordTextFilter
+org.apache.jackrabbit.core.query.MsPowerPointTextFilter
+org.apache.jackrabbit.core.query.PdfTextFilter

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.CharArrayReader;
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Extracts texts from MS Excel document binary data.
+ * Taken from Jakarta Slide class
+ * <code>org.apache.slide.extractor.MSExcelExtractor</code>
+ */
+public class MsExcelTextFilter implements TextFilter {
+
+    /**
+     * @return <code>true</code> for <code>application/vnd.ms-excel</code>, <code>false</code> otherwise.
+     */
+    public boolean canFilter(String mimeType) {
+        return "application/vnd.ms-excel".equalsIgnoreCase(mimeType);
+    }
+
+    /**
+     * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+     * @param data object containing MS Excel document data.
+     * @param encoding text encoding is not used, since it is specified in the data.
+     * @return a map with a single Reader value for field {@link FieldNames#FULLTEXT}.
+     * @throws RepositoryException if data is a multi-value property or it does not
+     * contain valid MS Excel document.
+     */
+    public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
+        InternalValue[] values = data.getValues();
+        if (values.length > 0) {
+            BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+                
+            try {
+                CharArrayWriter writer = new CharArrayWriter();
+    
+                POIFSFileSystem fs = new POIFSFileSystem(blob.getStream());
+                HSSFWorkbook workbook = new HSSFWorkbook(fs);
+    
+                for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
+                    HSSFSheet sheet = workbook.getSheetAt(i);
+    
+                    Iterator rows = sheet.rowIterator();
+                    while (rows.hasNext()) {
+                        HSSFRow row = (HSSFRow) rows.next();
+    
+                        Iterator cells = row.cellIterator();
+                        while (cells.hasNext()) {
+                            HSSFCell cell = (HSSFCell) cells.next();
+                            switch (cell.getCellType()) {
+                            case HSSFCell.CELL_TYPE_NUMERIC:
+                                String num = Double.toString(cell.getNumericCellValue()).trim();
+                                if (num.length() > 0) {
+                                    writer.write(num + " ");
+                                }
+                                break;
+                            case HSSFCell.CELL_TYPE_STRING:
+                                String text = cell.getStringCellValue().trim();
+                                if (text.length() > 0) {
+                                    writer.write(text + " ");
+                                }
+                                break;
+                            }
+                        }
+                    }
+                }
+                
+                Map result = new HashMap();
+                result.put(FieldNames.FULLTEXT, new CharArrayReader(writer.toCharArray()));
+                return result;
+            } 
+            catch (IOException ex) {
+                throw new RepositoryException(ex);
+            }
+        } 
+        else {
+            // multi value not supported
+            throw new RepositoryException("Multi-valued binary properties not supported.");
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * Extracts texts from MS PowerPoint document binary data. Taken from Jakarta Slide
+ * class <code>org.apache.slide.extractor.MSPowerPointExtractor</code>
+ */
+public class MsPowerPointTextFilter implements TextFilter {
+	/** logger */
+	private Log log = LogFactory.getLog(MsPowerPointTextFilter.class);
+
+	/**
+	 * Reader
+	 */
+	private class MsPowerPointListener implements POIFSReaderListener {
+		private OutputStream os;
+
+		MsPowerPointListener(OutputStream os) {
+			this.os = os;
+		}
+
+		public void processPOIFSReaderEvent(POIFSReaderEvent event) {
+			try {
+				if (!event.getName().equalsIgnoreCase("PowerPoint Document"))
+					return;
+				DocumentInputStream input = event.getStream();
+				byte[] buffer = new byte[input.available()];
+				input.read(buffer, 0, input.available());
+				for (int i = 0; i < buffer.length - 20; i++) {
+					long type = LittleEndian.getUShort(buffer, i + 2);
+					long size = LittleEndian.getUInt(buffer, i + 4);
+					if (type == 4008) {
+						os.write(buffer, i + 4 + 1, (int) size + 3);
+						i = i + 4 + 1 + (int) size - 1;
+					}
+				}
+			} catch (Exception e) {
+				log.error("Unable to load read file", e);
+			}
+		}
+	}
+
+	/**
+	 * @return <code>true</code> for <code>application/vnd.ms-powerpoint</code>,
+	 *         <code>false</code> otherwise.
+	 */
+	public boolean canFilter(String mimeType) {
+		return "application/vnd.ms-powerpoint".equalsIgnoreCase(mimeType)
+				|| "application/mspowerpoint".equalsIgnoreCase(mimeType);
+	}
+
+	/**
+	 * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+	 * 
+	 * @param data
+	 *            object containing MS PowerPoint document data.
+	 * @param encoding
+	 *            text encoding is not used, since it is specified in the data.
+	 * @return a map with a single Reader value for field
+	 *         {@link FieldNames#FULLTEXT}.
+	 * @throws RepositoryException
+	 *             if data is a multi-value property or it does not contain
+	 *             valid MS PowerPoint document.
+	 */
+	public Map doFilter(PropertyState data, String encoding)
+			throws RepositoryException {
+		InternalValue[] values = data.getValues();
+
+		if (values.length == 1) {
+			BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+
+			try {
+				ByteArrayOutputStream baos = new ByteArrayOutputStream();
+				MsPowerPointListener listener = new MsPowerPointListener(baos);
+				POIFSReader reader = new POIFSReader();
+				reader.registerListener(listener);
+				reader.read(blob.getStream());
+				Map result = new HashMap();
+				result.put(FieldNames.FULLTEXT, new InputStreamReader(
+						new ByteArrayInputStream(baos.toByteArray())));
+				
+				return result;
+			} catch (IOException ex) {
+				throw new RepositoryException(ex);
+			}
+		} else {
+			// multi value not supported
+			throw new RepositoryException(
+					"Multi-valued binary properties not supported.");
+		}
+	}
+
+}

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.textmining.text.extraction.WordExtractor;
+
+/**
+ * Extracts texts from MS Word document binary data.
+ * Taken from Jakarta Slide class
+ * <code>org.apache.slide.extractor.MSPowerPointExtractor</code>
+ */
+public class MsWordTextFilter implements TextFilter {
+
+    /**
+     * @return <code>true</code> for <code>application/vnd.ms-word</code> 
+     * or <code>application/msword</code>, <code>false</code> otherwise.
+     */
+    public boolean canFilter(String mimeType) {
+        return "application/vnd.ms-word".equalsIgnoreCase(mimeType) || 
+                "application/msword".equalsIgnoreCase(mimeType);
+    }
+
+    /**
+     * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+     * @param data object containing MS Word document data.
+     * @param encoding text encoding is not used, since it is specified in the data.
+     * @return a map with a single Reader value for field {@link FieldNames#FULLTEXT}.
+     * @throws RepositoryException if data is a multi-value property or it does not
+     * contain valid MS Word document.
+     */
+    public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
+        InternalValue[] values = data.getValues();
+        if (values.length > 0) {
+            BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+                
+            try {
+                WordExtractor  extractor = new WordExtractor();
+                
+                // This throws raw Exception - not nice
+                String text = extractor.extractText(blob.getStream());          
+                
+                Map result = new HashMap();
+                result.put(FieldNames.FULLTEXT, new StringReader(text));
+                return result;
+            } 
+            catch (Exception ex) {
+                throw new RepositoryException(ex);
+            }
+        } 
+        else {
+            // multi value not supported
+            throw new RepositoryException("Multi-valued binary properties not supported.");
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.CharArrayReader;
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.pdfbox.pdfparser.PDFParser;
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.util.PDFTextStripper;
+
+/**
+ * Extracts texts from Adobe PDF document binary data.
+ * Taken from Jakarta Slide class
+ * <code>org.apache.slide.extractor.PDFExtractor</code>
+ */
+public class PdfTextFilter implements TextFilter {
+
+    /**
+     * @return <code>true</code> for <code>application/pdf</code>, <code>false</code> otherwise.
+     */
+    public boolean canFilter(String mimeType) {
+        return "application/pdf".equalsIgnoreCase(mimeType);
+    }
+
+    /**
+     * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+     * @param data object containing Adobe PDF document data.
+     * @param encoding text encoding is not used, since it is specified in the data.
+     * @return a map with a single Reader value for field {@link FieldNames#FULLTEXT}.
+     * @throws RepositoryException if data is a multi-value property or it does not
+     * contain valid PDF document.
+     */
+    public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
+        InternalValue[] values = data.getValues();
+        if (values.length > 0) {
+            BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+                
+            try {
+                PDFParser parser = new PDFParser(blob.getStream());
+                parser.parse();
+    
+                PDDocument document = parser.getPDDocument();
+    
+                CharArrayWriter writer = new CharArrayWriter();
+    
+                PDFTextStripper stripper = new PDFTextStripper();
+                stripper.setLineSeparator("\n");
+                stripper.writeText(document, writer);
+    
+                document.close();
+                writer.close();
+                
+                Map result = new HashMap();
+                result.put(FieldNames.FULLTEXT, new CharArrayReader(writer.toCharArray()));
+                return result;
+            } 
+            catch (IOException ex) {
+                throw new RepositoryException(ex);
+            }
+        } 
+        else {
+            // multi value not supported
+            throw new RepositoryException("Multi-valued binary properties not supported.");
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+import java.io.Reader;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.jackrabbit.core.QName;
+import org.apache.jackrabbit.core.query.TextFilter;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.InternalValue;
+
+public class AbstractTextFilterTest {
+
+	public void showResult(File file, TextFilter filter) throws Exception {
+		PropertyState state = new PropertyState(new QName("", ""), "", 1, true);
+
+		InternalValue value = InternalValue.create(file);
+		state.setValues(new InternalValue[] { value });
+
+		Map fields = filter.doFilter(state, System.getProperty("encoding"));
+		for (Iterator it = fields.keySet().iterator(); it.hasNext();) {
+			String field = (String) it.next();
+			Reader r = (Reader) fields.get(field);
+			System.out.println("---------------");
+			System.out.println("Field: " + field);
+			int i;
+			while ((i = r.read()) != -1) {
+				System.out.print((char) i);
+			}
+			r.close();
+			System.out.println("");
+		}
+	}
+
+}

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.MsExcelTextFilter;
+
+
+public class MSExcelTest extends AbstractTextFilterTest {
+
+	public static void main(String[] args) throws Exception {
+		MSExcelTest test = new MSExcelTest();
+		File file = new File(args[0]);
+		test.showResult(file, new MsExcelTextFilter());
+	}
+
+}

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.MsPowerPointTextFilter;
+
+public class MSPowerPointTest extends AbstractTextFilterTest {
+
+	public static void main(String[] args) throws Exception {
+		MSPowerPointTest test = new MSPowerPointTest();
+		File file = new File(args[0]);
+		test.showResult(file, new MsPowerPointTextFilter());
+	}
+}

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.MsWordTextFilter;
+
+
+public class MsWordTest extends AbstractTextFilterTest {
+
+	public static void main(String[] args) throws Exception {
+		MsWordTest test = new MsWordTest();
+		File file = new File(args[0]);
+		test.showResult(file, new MsWordTextFilter());
+	}
+}

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java Sun Jul  3 14:39:55 2005
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.PdfTextFilter;
+
+
+public class PdfTest extends AbstractTextFilterTest {
+
+	public static void main(String[] args) throws Exception {
+		PdfTest test = new PdfTest();
+		File file = new File(args[0]);
+		test.showResult(file, new PdfTextFilter());
+	}
+}

Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java
------------------------------------------------------------------------------
    svn:eol-style = native