You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ed...@apache.org on 2005/07/03 23:39:56 UTC
svn commit: r208978 - in /incubator/jackrabbit/trunk/contrib/textfilters: ./
src/ src/java/ src/java/META-INF/ src/java/META-INF/services/ src/java/org/
src/java/org/apache/ src/java/org/apache/jackrabbit/
src/java/org/apache/jackrabbit/core/ src/java/...
Author: edgarpoce
Date: Sun Jul 3 14:39:55 2005
New Revision: 208978
URL: http://svn.apache.org/viewcvs?rev=208978&view=rev
Log:
Added textfilters contribution, thanks Ján Halasa!.
Added:
incubator/jackrabbit/trunk/contrib/textfilters/
incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt (with props)
incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt (with props)
incubator/jackrabbit/trunk/contrib/textfilters/README.txt (with props)
incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml (with props)
incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml (with props)
incubator/jackrabbit/trunk/contrib/textfilters/project.properties (with props)
incubator/jackrabbit/trunk/contrib/textfilters/project.xml
incubator/jackrabbit/trunk/contrib/textfilters/src/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/test/
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java (with props)
incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java (with props)
Added: incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt Sun Jul 3 14:39:55 2005
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
\ No newline at end of file
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/HEADER.txt
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt Sun Jul 3 14:39:55 2005
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/LICENSE.txt
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/README.txt
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/README.txt?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/README.txt (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/README.txt Sun Jul 3 14:39:55 2005
@@ -0,0 +1,20 @@
+TextFilters allow Jackrabbit to extract text from binary
+properties for indexing purposes.
+
+This project contains TextFilter implementations for the
+following binary formats:
+
+1. MsExcel
+2. MsPowerPoint
+3. MsWord
+4. Pdf
+
+How to register in jackrabbit?
+Build the jar file and place it in the Jackrabbit
+classpath. The filters will be automatically loaded
+on startup.
+
+For further information, see the javadocs for:
+org.apache.jackrabbit.core.query.TextFilter
+org.apache.jackrabbit.core.query.TextFilterService
+
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/README.txt
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml Sun Jul 3 14:39:55 2005
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE suppressions PUBLIC
+ "-//Puppy Crawl//DTD Suppressions 1.0//EN"
+ "http://www.puppycrawl.com/dtds/suppressions_1_0.dtd">
+
+<suppressions>
+ <!--
+ Suppressions for generated JCRQL parser
+ -->
+ <suppress checks=".*" files="(JJT)?JCRSQL.*.java"/>
+ <!-- Suppressions for the generated XPath parser -->
+ <suppress checks=".*" files="(JJT)?XPath.*.java"/>
+</suppressions>
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle-suppressions.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml Sun Jul 3 14:39:55 2005
@@ -0,0 +1,171 @@
+<?xml version="1.0"?>
+<!--
+ Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ as applicable.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<!DOCTYPE module PUBLIC
+ "-//Puppy Crawl//DTD Check Configuration 1.1//EN"
+ "http://www.puppycrawl.com/dtds/configuration_1_1.dtd">
+
+<!--
+ Checkstyle checks configured for Maven.
+-->
+
+<module name="Checker">
+
+ <!-- Checks that a package.html file exists for each package. -->
+ <!-- See http://checkstyle.sf.net/config_javadoc.html#PackageHtml -->
+ <module name="PackageHtml"/>
+
+ <!-- Checks whether files end with a new line. -->
+ <!-- See http://checkstyle.sf.net/config_misc.html#NewlineAtEndOfFile -->
+ <module name="NewlineAtEndOfFile"/>
+
+ <!-- Checks that property files contain the same keys. -->
+ <!-- See http://checkstyle.sf.net/config_misc.html#Translation -->
+ <module name="Translation"/>
+
+ <module name="TreeWalker">
+
+ <property name="cacheFile" value="${checkstyle.cache.file}"/>
+
+ <!-- ************************************************************** -->
+ <!-- Checks that are different from the sun coding conventions ones -->
+ <!-- ************************************************************** -->
+
+ <module name="Header">
+ <property name="headerFile" value="${basedir}/HEADER.txt"/>
+ </module>
+ <!-- <property name="tabWidth" value="4"/> -->
+ <module name="LeftCurly">
+ <property name="option" value="eol"/>
+ </module>
+ <module name="LineLength">
+ <property name="max" value="132"/>
+ <property name="ignorePattern" value="\* \$"/>
+ </module>
+ <module name="MethodLength">
+ <property name="max" value="175"/>
+ </module>
+ <module name="ConstantName">
+ <property name="format" value="log|^[a-zA-Z][a-zA-Z0-9_]*$"/>
+ </module>
+
+ <!-- ************************************************************** -->
+ <!-- Default Sun coding conventions checks -->
+ <!-- ************************************************************** -->
+
+ <!-- Checks for Javadoc comments. -->
+ <!-- See http://checkstyle.sf.net/config_javadoc.html -->
+ <module name="JavadocMethod"/>
+ <module name="JavadocType"/>
+ <module name="JavadocVariable"/>
+
+ <!-- Checks for Naming Conventions. -->
+ <!-- See http://checkstyle.sf.net/config_naming.html -->
+ <module name="LocalFinalVariableName"/>
+ <module name="LocalVariableName"/>
+ <module name="MethodName"/>
+ <module name="PackageName"/>
+ <module name="ParameterName"/>
+ <module name="StaticVariableName"/>
+ <module name="TypeName"/>
+ <module name="MemberName"/>
+
+ <!-- Checks for imports -->
+ <!-- See http://checkstyle.sf.net/config_import.html -->
+ <module name="AvoidStarImport"/>
+ <module name="IllegalImport"/> <!-- defaults to sun.* packages -->
+ <module name="RedundantImport"/>
+ <module name="UnusedImports"/>
+
+
+ <!-- Checks for Size Violations. -->
+ <!-- See http://checkstyle.sf.net/config_sizes.html -->
+ <module name="FileLength"/>
+ <module name="ParameterNumber"/>
+
+
+ <!-- Checks for whitespace -->
+ <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+ <module name="EmptyForIteratorPad"/>
+ <module name="NoWhitespaceAfter"/>
+ <module name="NoWhitespaceBefore"/>
+ <module name="OperatorWrap"/>
+ <module name="TabCharacter"/>
+ <module name="WhitespaceAfter"/>
+ <module name="WhitespaceAround"/>
+
+
+ <!-- Modifier Checks -->
+ <!-- See http://checkstyle.sf.net/config_modifiers.html -->
+ <module name="ModifierOrder"/>
+ <module name="RedundantModifier"/>
+
+
+ <!-- Checks for blocks. You know, those {}'s -->
+ <!-- See http://checkstyle.sf.net/config_blocks.html -->
+ <module name="AvoidNestedBlocks"/>
+ <module name="NeedBraces"/>
+
+ <!-- Checks for common coding problems -->
+ <!-- See http://checkstyle.sf.net/config_coding.html -->
+ <!-- <module name="AvoidInlineConditionals"/> --> <!-- DISABLED-->
+ <module name="DoubleCheckedLocking"/>
+ <module name="EqualsHashCode"/>
+ <module name="IllegalInstantiation"/>
+ <module name="InnerAssignment"/>
+ <module name="MissingSwitchDefault"/>
+ <module name="RedundantThrows">
+ <property name="allowUnchecked" value="true"/> <!-- DISABLED -->
+ <property name="allowSubclasses" value="true"/> <!-- DISABLED -->
+ </module>
+ <module name="SimplifyBooleanExpression"/>
+ <module name="SimplifyBooleanReturn"/>
+
+ <!-- Checks for class design -->
+ <!-- See http://checkstyle.sf.net/config_design.html -->
+ <module name="DesignForExtension">
+ <property name="severity" value="ignore"/> <!-- DISABLED -->
+ </module>
+ <module name="HideUtilityClassConstructor"/>
+ <module name="InterfaceIsType"/>
+ <module name="VisibilityModifier">
+ <!-- Protected member variables are widely used in Jackrabbit -->
+ <property name="protectedAllowed" value="true"/>
+ </module>
+
+
+ <!-- Miscellaneous other checks. -->
+ <!-- See http://checkstyle.sf.net/config_misc.html -->
+ <module name="ArrayTypeStyle"/>
+ <module name="FinalParameters">
+ <property name="severity" value="ignore"/> <!-- DISABLED -->
+ </module>
+ <module name="GenericIllegalRegexp">
+ <property name="format" value="\s+$"/>
+ <property name="message" value="Line has trailing spaces."/>
+ </module>
+ <module name="TodoComment"/>
+ <module name="UpperEll"/>
+
+ </module>
+
+ <module name="SuppressionFilter">
+ <property name="file" value="checkstyle-suppressions.xml"/>
+ </module>
+
+</module>
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/checkstyle.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/project.properties
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/project.properties?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/project.properties (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/project.properties Sun Jul 3 14:39:55 2005
@@ -0,0 +1,101 @@
+# Copyright 2003-2005 The Apache Software Foundation or its licensors,
+# as applicable
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+######################################################################
+# Apache Central Repository
+######################################################################
+maven.repo.central=www.apache.org
+maven.repo.central.directory=/www/www.apache.org/dist/java-repository
+maven.remote.group=apcvs
+maven.changelog.factory = org.apache.maven.svnlib.SvnChangeLogFactory
+
+######################################################################
+# JUnit Testing
+######################################################################
+maven.test.failure = false
+maven.junit.fork=true
+#maven.junit.sysproperties=org.xml.sax.driver java.security.auth.login.config
+maven.junit.sysproperties=org.xml.sax.driver
+org.xml.sax.driver=org.apache.xerces.parsers.SAXParser
+#java.security.auth.login.config=applications/test/jaas.config
+
+
+#If you wish to skip tests when doing builds, uncomment
+#maven.test.skip = true
+
+######################################################################
+# Checkstyle
+######################################################################
+maven.checkstyle.properties= checkstyle.xml
+maven.linkcheck.enable=false
+
+######################################################################
+# JavaDoc
+#
+# javadoc urls can be added here, multiple urls are appended using a comma
+#
+# maven.javadoc.links = http://foo/bar/api,\
+# http://flim/flam/api/
+######################################################################
+maven.javadoc.links=http://java.sun.com/j2se/1.4.2/docs/api/,http://www.day.com/maven/jsr170/javadocs/jcr-0.16.4.1/
+maven.javadoc.author=false
+maven.javadoc.version=false
+
+######################################################################
+# Other opts
+######################################################################
+# uncomment the next line to work in offline mode (no jar download & no linkcheck)
+#maven.mode.online=
+
+maven.compile.debug=on
+maven.compile.deprecation=off
+maven.compile.optimize=off
+maven.compile.source=1.4
+maven.compile.target=1.4
+
+maven.jarResources.basedir=src/java
+maven.jar.excludes=**/package.html
+
+# Location of the generated query language parsers. Needed for
+# the Maven Eclipse plugin to automatically locate the generated
+# source files. Note that this value matches the hardcoded path
+# in the Maven JavaCC plugin. Therefore, do not change this value!
+maven.gen.src=${maven.build.dir}/generated-src/main
+
+# specifying additional remote repository for downloading dependencies
+# not available at www.ibiblio.org/maven/
+maven.repo.remote = http://www.ibiblio.org/maven/
+
+######################################################################
+# Site L&F
+######################################################################
+# maven.xdoc.jsl=
+maven.xdoc.date=
+maven.xdoc.poweredby.image=maven-feather.png
+maven.xdoc.version=${pom.currentVersion}
+maven.xdoc.developmentProcessUrl=http://incubator.apache.org/projects/jackrabbit.html
+maven.changelog.range=60
+maven.changelog.factory=org.apache.maven.svnlib.SvnChangeLogFactory
+
+# ------------------------------------------------------------------------
+# M A V E N J A R O V E R R I D E
+# ------------------------------------------------------------------------
+#maven.jar.override = on
+#maven.jar.jcr = ${basedir}/lib/jcr.jar
+
+######################################################################
+# Site Deploy (into ../jackrabbit-site for checkout on incubator.apache.org)
+######################################################################
+maven.site.deploy.method=fs
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/project.properties
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/project.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/project.xml?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/project.xml (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/project.xml Sun Jul 3 14:39:55 2005
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ as applicable.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<project>
+ <pomVersion>3</pomVersion>
+ <artifactId>jackrabbit-textfilters</artifactId>
+ <groupId>jackrabbit</groupId>
+ <id>jackrabbit</id>
+ <name>Jackrabbit - Text filters</name>
+ <currentVersion>1.0-dev</currentVersion>
+ <organization>
+ <name>The Apache Software Foundation</name>
+ <url>http://incubator.apache.org/projects/jackrabbit.html</url>
+ <logo>http://incubator.apache.org/images/apache-incubator-logo.png</logo>
+ </organization>
+ <package>org.apache.jackrabbit.*</package>
+ <logo>/images/jackrabbitlogo.gif</logo>
+ <url>http://incubator.apache.org/projects/jackrabbit.html</url>
+ <issueTrackingUrl>http://issues.apache.org/jira/browse/JCR</issueTrackingUrl>
+ <siteDirectory>../jackrabbit-site</siteDirectory>
+ <distributionSite>incubator.apache.org</distributionSite>
+ <distributionDirectory>/www/www.apache.org/dist/java-repository/</distributionDirectory>
+ <repository>
+ <connection>scm:subversion:http://svn.apache.org/repos/asf/incubator/jackrabbit/trunk</connection>
+ <developerConnection>scm:subversion:https://svn.apache.org/repos/asf/incubator/jackrabbit/trunk</developerConnection>
+ <url>http://svn.apache.org/viewcvs</url>
+ </repository>
+ <mailingLists>
+ <!--
+ <mailingList>
+ <name>Jackrabbit Users List</name>
+ <subscribe>user-subscribe@jackrabbit.apache.org</subscribe>
+ <unsubscribe>user-unsubscribe@jackrabbit.apache.org</unsubscribe>
+ <archive>http://mail-archives.apache.org/eyebrowse/SummarizeList?listName=user@jackrabbit.apache.org</archive>
+ </mailingList>
+-->
+ <mailingList>
+ <name>Jackrabbit Developer List</name>
+ <subscribe>jackrabbit-dev-subscribe at incubator.apache.org</subscribe>
+ <unsubscribe>jackrabbit-dev-unsubscribe at incubator.apache.org</unsubscribe>
+ <archive>http://incubator.apache.org/mail/jackrabbit-dev/</archive>
+ </mailingList>
+ <mailingList>
+ <name>Jackrabbit Source Control List</name>
+ <subscribe>jackrabbit-commits-subscribe at incubator.apache.org</subscribe>
+ <unsubscribe>jackrabbit-commits-unsubscribe at incubator.apache.org</unsubscribe>
+ <archive>http://incubator.apache.org/mail/jackrabbit-commits/</archive>
+ </mailingList>
+ </mailingLists>
+ <developers>
+ <developer>
+ <name>Roy T. Fielding</name>
+ <id>fielding</id>
+ <organization>Day Software</organization>
+ <timezone>-8</timezone>
+ </developer>
+ <developer>
+ <name>Stefan Guggisberg</name>
+ <id>stefan</id>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Stefano Mazzocchi</name>
+ <id>stefano</id>
+ <timezone>-5</timezone>
+ </developer>
+ <developer>
+ <name>David Nuescheler</name>
+ <id>uncled</id>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Dominique Pfister</name>
+ <id>dpfister</id>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Peeter Piegaze</name>
+ <id>ppiegaze</id>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Gianugo Rabellino</name>
+ <id>gianugo</id>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Tim Reilly</name>
+ <id>treilly</id>
+ <email>treilly at apache dot org</email>
+ <timezone>-5</timezone>
+ </developer>
+ <developer>
+ <name>Marcel Reutegger</name>
+ <id>mreutegg</id>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Paul Russell</name>
+ <id>prussell</id>
+ <timezone>+0</timezone>
+ </developer>
+ <developer>
+ <name>Andrew Savory</name>
+ <id>asavory</id>
+ <timezone>+0</timezone>
+ </developer>
+ <developer>
+ <name>Tobias Strasser</name>
+ <id>tripod</id>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Sylvain Wallez</name>
+ <id>sylvain</id>
+ <timezone>+1</timezone>
+ </developer>
+ <developer>
+ <name>Jukka Zitting</name>
+ <id>jukka</id>
+ <email>jz@yukatan.fi</email>
+ <organization>Yukatan</organization>
+ <timezone>+2</timezone>
+ </developer>
+ </developers>
+ <contributors>
+ <contributor>
+ <name>Serge Huber</name>
+ <timezone>+1</timezone>
+ </contributor>
+ <contributor>
+ <name>Felix Meschberger</name>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </contributor>
+ <contributor>
+ <name>Edgar Poce</name>
+ <email>edgarpoce@gmail.com</email>
+ </contributor>
+ <contributor>
+ <name>Angela Schreiber</name>
+ <organization>Day Software</organization>
+ <timezone>+1</timezone>
+ </contributor>
+ </contributors>
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>/LICENSE.txt</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+ <dependencies>
+ <!--
+ cqfs-jackrabbit and cqfs are optional runtime dependencies
+ (an alternative FileSystem implementation);
+ commons-logging is a dependency of cqfs
+ -->
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ <version>3.1</version>
+ <type>jar</type>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.8</version>
+ <type>jar</type>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.0</version>
+ <type>jar</type>
+ </dependency>
+ <dependency>
+ <groupId>poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>2.0-final-20040126</version>
+ <type>jar</type>
+ </dependency>
+ <dependency>
+ <groupId>pdfbox</groupId>
+ <artifactId>pdfbox</artifactId>
+ <version>0.6.4</version>
+ <type>jar</type>
+ </dependency>
+ <dependency>
+ <groupId>jackrabbit</groupId>
+ <artifactId>jackrabbit</artifactId>
+ <version>0.16.4.1-dev</version>
+ <type>jar</type>
+ </dependency>
+ <dependency>
+ <groupId>textmining</groupId>
+ <artifactId>tm-extractors</artifactId>
+ <version>0.4</version>
+ <type>jar</type>
+ <url>http://www.textmining.org</url>
+ </dependency>
+ <dependency>
+ <groupId>jsr170</groupId>
+ <artifactId>jcr</artifactId>
+ <version>0.16.4.1</version>
+ <type>jar</type>
+ <url>http://www.day.com/maven/jsr170/jars/jcr-0.16.4.1.jar</url>
+ </dependency>
+ </dependencies>
+ <build>
+ <sourceDirectory>src/java</sourceDirectory>
+ <unitTestSourceDirectory>src/test</unitTestSourceDirectory>
+ <unitTest>
+ <includes>
+ <include>**/*TestAll.java</include>
+ </includes>
+ <resources>
+ <resource>
+ <directory>src/test</directory>
+ <includes>
+ <include>**/*.xml</include>
+ <include>**/*.txt</include>
+ </includes>
+ <filtering>false</filtering>
+ </resource>
+ </resources>
+ </unitTest>
+ <!-- J A R R E S O U R C E S -->
+ <!-- Resources that are packaged up inside the JAR file -->
+ <resources>
+ <resource>
+ <directory>src/java</directory>
+ <includes>
+ <include>**/*.xml</include>
+ <include>**/*.properties</include>
+ <include>**/*.TextFilterService</include>
+ </includes>
+ <filtering>false</filtering>
+ </resource>
+ </resources>
+ </build>
+ <reports>
+ <report>maven-changelog-plugin</report>
+ <report>maven-changes-plugin</report>
+ <!-- <report>maven-checkstyle-plugin</report> -->
+ <!-- <report>maven-clover-plugin</report> -->
+ <!-- <report>maven-developer-activity-plugin</report> -->
+ <!-- <report>maven-file-activity-plugin</report> -->
+ <report>maven-javadoc-plugin</report>
+ <!-- <report>maven-jellydoc-plugin</report> -->
+ <report>maven-junit-report-plugin</report>
+ <report>maven-jxr-plugin</report>
+ <report>maven-license-plugin</report>
+ <!-- <report>maven-linkcheck-plugin</report> -->
+ <!-- <report>maven-statcvs-plugin</report> -->
+ <report>maven-tasklist-plugin</report>
+ </reports>
+</project>
+
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService Sun Jul 3 14:39:55 2005
@@ -0,0 +1,24 @@
+# Copyright 2004-2005 The Apache Software Foundation or its licensors,
+# as applicable.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# This file lists all available TextFilter implementations that are shipped
+# with Jackrabbit.
+#
+
+org.apache.jackrabbit.core.query.MsExcelTextFilter
+org.apache.jackrabbit.core.query.MsWordTextFilter
+org.apache.jackrabbit.core.query.MsPowerPointTextFilter
+org.apache.jackrabbit.core.query.PdfTextFilter
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.CharArrayReader;
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Extracts texts from MS Excel document binary data.
+ * Taken from Jakarta Slide class
+ * <code>org.apache.slide.extractor.MSExcelExtractor</code>
+ */
+public class MsExcelTextFilter implements TextFilter {
+
+ /**
+ * @return <code>true</code> for <code>application/vnd.ms-excel</code>, <code>false</code> otherwise.
+ */
+ public boolean canFilter(String mimeType) {
+ return "application/vnd.ms-excel".equalsIgnoreCase(mimeType);
+ }
+
+ /**
+ * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+ * @param data object containing MS Excel document data.
+ * @param encoding text encoding is not used, since it is specified in the data.
+ * @return a map with a single Reader value for field {@link FieldNames#FULLTEXT}.
+ * @throws RepositoryException if data is a multi-value property or it does not
+ * contain valid MS Excel document.
+ */
+ public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
+ InternalValue[] values = data.getValues();
+ if (values.length > 0) {
+ BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+
+ try {
+ CharArrayWriter writer = new CharArrayWriter();
+
+ POIFSFileSystem fs = new POIFSFileSystem(blob.getStream());
+ HSSFWorkbook workbook = new HSSFWorkbook(fs);
+
+ for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
+ HSSFSheet sheet = workbook.getSheetAt(i);
+
+ Iterator rows = sheet.rowIterator();
+ while (rows.hasNext()) {
+ HSSFRow row = (HSSFRow) rows.next();
+
+ Iterator cells = row.cellIterator();
+ while (cells.hasNext()) {
+ HSSFCell cell = (HSSFCell) cells.next();
+ switch (cell.getCellType()) {
+ case HSSFCell.CELL_TYPE_NUMERIC:
+ String num = Double.toString(cell.getNumericCellValue()).trim();
+ if (num.length() > 0) {
+ writer.write(num + " ");
+ }
+ break;
+ case HSSFCell.CELL_TYPE_STRING:
+ String text = cell.getStringCellValue().trim();
+ if (text.length() > 0) {
+ writer.write(text + " ");
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ Map result = new HashMap();
+ result.put(FieldNames.FULLTEXT, new CharArrayReader(writer.toCharArray()));
+ return result;
+ }
+ catch (IOException ex) {
+ throw new RepositoryException(ex);
+ }
+ }
+ else {
+ // multi value not supported
+ throw new RepositoryException("Multi-valued binary properties not supported.");
+ }
+ }
+}
\ No newline at end of file
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsExcelTextFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * Extracts texts from MS PowerPoint document binary data. Taken from Jakarta Slide
+ * class <code>org.apache.slide.extractor.MSPowerPointExtractor</code>
+ */
+public class MsPowerPointTextFilter implements TextFilter {
+ /** logger */
+ private Log log = LogFactory.getLog(MsPowerPointTextFilter.class);
+
+ /**
+ * Reader
+ */
+ private class MsPowerPointListener implements POIFSReaderListener {
+ private OutputStream os;
+
+ MsPowerPointListener(OutputStream os) {
+ this.os = os;
+ }
+
+ public void processPOIFSReaderEvent(POIFSReaderEvent event) {
+ try {
+ if (!event.getName().equalsIgnoreCase("PowerPoint Document"))
+ return;
+ DocumentInputStream input = event.getStream();
+ byte[] buffer = new byte[input.available()];
+ input.read(buffer, 0, input.available());
+ for (int i = 0; i < buffer.length - 20; i++) {
+ long type = LittleEndian.getUShort(buffer, i + 2);
+ long size = LittleEndian.getUInt(buffer, i + 4);
+ if (type == 4008) {
+ os.write(buffer, i + 4 + 1, (int) size + 3);
+ i = i + 4 + 1 + (int) size - 1;
+ }
+ }
+ } catch (Exception e) {
+ log.error("Unable to load read file", e);
+ }
+ }
+ }
+
+ /**
+ * @return <code>true</code> for <code>application/vnd.ms-powerpoint</code>,
+ * <code>false</code> otherwise.
+ */
+ public boolean canFilter(String mimeType) {
+ return "application/vnd.ms-powerpoint".equalsIgnoreCase(mimeType)
+ || "application/mspowerpoint".equalsIgnoreCase(mimeType);
+ }
+
+ /**
+ * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+ *
+ * @param data
+ * object containing MS PowerPoint document data.
+ * @param encoding
+ * text encoding is not used, since it is specified in the data.
+ * @return a map with a single Reader value for field
+ * {@link FieldNames#FULLTEXT}.
+ * @throws RepositoryException
+ * if data is a multi-value property or it does not contain
+ * valid MS PowerPoint document.
+ */
+ public Map doFilter(PropertyState data, String encoding)
+ throws RepositoryException {
+ InternalValue[] values = data.getValues();
+
+ if (values.length == 1) {
+ BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ MsPowerPointListener listener = new MsPowerPointListener(baos);
+ POIFSReader reader = new POIFSReader();
+ reader.registerListener(listener);
+ reader.read(blob.getStream());
+ Map result = new HashMap();
+ result.put(FieldNames.FULLTEXT, new InputStreamReader(
+ new ByteArrayInputStream(baos.toByteArray())));
+
+ return result;
+ } catch (IOException ex) {
+ throw new RepositoryException(ex);
+ }
+ } else {
+ // multi value not supported
+ throw new RepositoryException(
+ "Multi-valued binary properties not supported.");
+ }
+ }
+
+}
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsPowerPointTextFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.textmining.text.extraction.WordExtractor;
+
+/**
+ * Extracts texts from MS Word document binary data.
+ * Taken from Jakarta Slide class
+ * <code>org.apache.slide.extractor.MSPowerPointExtractor</code>
+ */
+public class MsWordTextFilter implements TextFilter {
+
+ /**
+ * @return <code>true</code> for <code>application/vnd.ms-word</code>
+ * or <code>application/msword</code>, <code>false</code> otherwise.
+ */
+ public boolean canFilter(String mimeType) {
+ return "application/vnd.ms-word".equalsIgnoreCase(mimeType) ||
+ "application/msword".equalsIgnoreCase(mimeType);
+ }
+
+ /**
+ * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+ * @param data object containing MS Word document data.
+ * @param encoding text encoding is not used, since it is specified in the data.
+ * @return a map with a single Reader value for field {@link FieldNames#FULLTEXT}.
+ * @throws RepositoryException if data is a multi-value property or it does not
+ * contain valid MS Word document.
+ */
+ public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
+ InternalValue[] values = data.getValues();
+ if (values.length > 0) {
+ BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+
+ try {
+ WordExtractor extractor = new WordExtractor();
+
+ // This throws raw Exception - not nice
+ String text = extractor.extractText(blob.getStream());
+
+ Map result = new HashMap();
+ result.put(FieldNames.FULLTEXT, new StringReader(text));
+ return result;
+ }
+ catch (Exception ex) {
+ throw new RepositoryException(ex);
+ }
+ }
+ else {
+ // multi value not supported
+ throw new RepositoryException("Multi-valued binary properties not supported.");
+ }
+ }
+}
\ No newline at end of file
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/MsWordTextFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.CharArrayReader;
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.pdfbox.pdfparser.PDFParser;
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.util.PDFTextStripper;
+
+/**
+ * Extracts texts from Adobe PDF document binary data.
+ * Taken from Jakarta Slide class
+ * <code>org.apache.slide.extractor.PDFExtractor</code>
+ */
+public class PdfTextFilter implements TextFilter {
+
+ /**
+ * @return <code>true</code> for <code>application/pdf</code>, <code>false</code> otherwise.
+ */
+ public boolean canFilter(String mimeType) {
+ return "application/pdf".equalsIgnoreCase(mimeType);
+ }
+
+ /**
+ * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
+ * @param data object containing Adobe PDF document data.
+ * @param encoding text encoding is not used, since it is specified in the data.
+ * @return a map with a single Reader value for field {@link FieldNames#FULLTEXT}.
+ * @throws RepositoryException if data is a multi-value property or it does not
+ * contain valid PDF document.
+ */
+ public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
+ InternalValue[] values = data.getValues();
+ if (values.length > 0) {
+ BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+
+ try {
+ PDFParser parser = new PDFParser(blob.getStream());
+ parser.parse();
+
+ PDDocument document = parser.getPDDocument();
+
+ CharArrayWriter writer = new CharArrayWriter();
+
+ PDFTextStripper stripper = new PDFTextStripper();
+ stripper.setLineSeparator("\n");
+ stripper.writeText(document, writer);
+
+ document.close();
+ writer.close();
+
+ Map result = new HashMap();
+ result.put(FieldNames.FULLTEXT, new CharArrayReader(writer.toCharArray()));
+ return result;
+ }
+ catch (IOException ex) {
+ throw new RepositoryException(ex);
+ }
+ }
+ else {
+ // multi value not supported
+ throw new RepositoryException("Multi-valued binary properties not supported.");
+ }
+ }
+}
\ No newline at end of file
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/java/org/apache/jackrabbit/core/query/PdfTextFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+import java.io.Reader;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.jackrabbit.core.QName;
+import org.apache.jackrabbit.core.query.TextFilter;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.InternalValue;
+
+public class AbstractTextFilterTest {
+
+ public void showResult(File file, TextFilter filter) throws Exception {
+ PropertyState state = new PropertyState(new QName("", ""), "", 1, true);
+
+ InternalValue value = InternalValue.create(file);
+ state.setValues(new InternalValue[] { value });
+
+ Map fields = filter.doFilter(state, System.getProperty("encoding"));
+ for (Iterator it = fields.keySet().iterator(); it.hasNext();) {
+ String field = (String) it.next();
+ Reader r = (Reader) fields.get(field);
+ System.out.println("---------------");
+ System.out.println("Field: " + field);
+ int i;
+ while ((i = r.read()) != -1) {
+ System.out.print((char) i);
+ }
+ r.close();
+ System.out.println("");
+ }
+ }
+
+}
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/AbstractTextFilterTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.MsExcelTextFilter;
+
+
+public class MSExcelTest extends AbstractTextFilterTest {
+
+ public static void main(String[] args) throws Exception {
+ MSExcelTest test = new MSExcelTest();
+ File file = new File(args[0]);
+ test.showResult(file, new MsExcelTextFilter());
+ }
+
+}
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSExcelTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.MsPowerPointTextFilter;
+
+public class MSPowerPointTest extends AbstractTextFilterTest {
+
+ public static void main(String[] args) throws Exception {
+ MSPowerPointTest test = new MSPowerPointTest();
+ File file = new File(args[0]);
+ test.showResult(file, new MsPowerPointTextFilter());
+ }
+}
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MSPowerPointTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.MsWordTextFilter;
+
+
+public class MsWordTest extends AbstractTextFilterTest {
+
+ public static void main(String[] args) throws Exception {
+ MsWordTest test = new MsWordTest();
+ File file = new File(args[0]);
+ test.showResult(file, new MsWordTextFilter());
+ }
+}
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/MsWordTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java?rev=208978&view=auto
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java (added)
+++ incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java Sun Jul 3 14:39:55 2005
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.PdfTextFilter;
+
+
+public class PdfTest extends AbstractTextFilterTest {
+
+ public static void main(String[] args) throws Exception {
+ PdfTest test = new PdfTest();
+ File file = new File(args[0]);
+ test.showResult(file, new PdfTextFilter());
+ }
+}
Propchange: incubator/jackrabbit/trunk/contrib/textfilters/src/test/org/apache/jackrabbit/core/query/test/PdfTest.java
------------------------------------------------------------------------------
svn:eol-style = native