You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by un...@apache.org on 2012/12/07 11:41:45 UTC

svn commit: r1418267 - in /jackrabbit/branches/2.4/jackrabbit-core/src: main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java

Author: unico
Date: Fri Dec  7 10:41:43 2012
New Revision: 1418267

URL: http://svn.apache.org/viewvc?rev=1418267&view=rev
Log:
JCR-3476 backport: only extract binary values when parser supports extracting them

Modified:
    jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
    jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java

Modified: jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java?rev=1418267&r1=1418266&r2=1418267&view=diff
==============================================================================
--- jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (original)
+++ jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java Fri Dec  7 10:41:43 2012
@@ -45,6 +45,7 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Fieldable;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.Parser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -97,6 +98,11 @@ public class NodeIndexer {
     private final Parser parser;
 
     /**
+     * The media types supported by the parser used.
+     */
+    private Set<MediaType> supportedMediaTypes;
+
+    /**
      * The indexing configuration or <code>null</code> if none is available.
      */
     protected IndexingConfiguration indexingConfig;
@@ -448,7 +454,7 @@ public class NodeIndexer {
      * <p/>
      * This implementation checks if this {@link #node} is of type nt:resource
      * and if that is the case, tries to extract text from the binary property
-     * using the {@link #extractor}.
+     * using the {@link #parser}.
      *
      * @param doc           The document to which to add the field
      * @param fieldName     The name of the field to add
@@ -466,7 +472,7 @@ public class NodeIndexer {
             }
 
             InternalValue type = getValue(NameConstants.JCR_MIMETYPE);
-            if (type != null) {
+            if (type != null && isSupportedMediaType(type.getString())) {
                 Metadata metadata = new Metadata();
                 metadata.set(Metadata.CONTENT_TYPE, type.getString());
 
@@ -681,7 +687,7 @@ public class NodeIndexer {
      * @param doc           The document to which to add the field
      * @param fieldName     The name of the field to add
      * @param internalValue The value for the field to add to the document.
-     * @deprecated Use {@link #addStringValue(Document, String, Object, boolean)
+     * @deprecated Use {@link #addStringValue(Document, String, String, boolean)
      *             addStringValue(Document, String, Object, boolean)} instead.
      */
     protected void addStringValue(Document doc, String fieldName, String internalValue) {
@@ -719,7 +725,7 @@ public class NodeIndexer {
      *                           tokenized and added to the node scope fulltext
      *                           index.
      * @param boost              the boost value for this string field.
-     * @deprecated use {@link #addStringValue(Document, String, Object, boolean, boolean, float, boolean)} instead.
+     * @deprecated use {@link #addStringValue(Document, String, String, boolean, boolean, float, boolean)} instead.
      */
     protected void addStringValue(Document doc, String fieldName,
                                   String internalValue, boolean tokenized,
@@ -903,6 +909,20 @@ public class NodeIndexer {
     }
 
     /**
+     * Returns <code>true</code> if the provided type is among the types
+     * supported by the Tika parser we are using.
+     *
+     * @param type  the type to check.
+     * @return whether the type is supported by the Tika parser we are using.
+     */
+    protected boolean isSupportedMediaType(final String type) {
+        if (supportedMediaTypes == null) {
+            supportedMediaTypes = parser.getSupportedTypes(null);
+        }
+        return supportedMediaTypes.contains(MediaType.parse(type));
+    }
+
+    /**
      * Returns the boost value for the given property name.
      *
      * @param propertyName the name of a property.

Modified: jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java?rev=1418267&r1=1418266&r2=1418267&view=diff
==============================================================================
--- jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java (original)
+++ jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java Fri Dec  7 10:41:43 2012
@@ -343,7 +343,7 @@ public class FulltextQueryTest extends A
         assertFileContains(
                 "test.txt", "text/plain", "AE502DBEA2C411DEBD340AD156D89593");
         assertFileContains(
-                "test.rtf", "text/rtf", "quick brown fox");
+                "test.rtf", "application/rtf", "quick brown fox");
     }
 
     private void assertFileContains(