You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by un...@apache.org on 2012/12/07 11:41:45 UTC
svn commit: r1418267 - in /jackrabbit/branches/2.4/jackrabbit-core/src:
main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java
Author: unico
Date: Fri Dec 7 10:41:43 2012
New Revision: 1418267
URL: http://svn.apache.org/viewvc?rev=1418267&view=rev
Log:
JCR-3476 backport: only extract binary values when parser supports extracting them
Modified:
jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java
Modified: jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java?rev=1418267&r1=1418266&r2=1418267&view=diff
==============================================================================
--- jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (original)
+++ jackrabbit/branches/2.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java Fri Dec 7 10:41:43 2012
@@ -45,6 +45,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -97,6 +98,11 @@ public class NodeIndexer {
private final Parser parser;
/**
+ * The media types supported by the parser used.
+ */
+ private Set<MediaType> supportedMediaTypes;
+
+ /**
* The indexing configuration or <code>null</code> if none is available.
*/
protected IndexingConfiguration indexingConfig;
@@ -448,7 +454,7 @@ public class NodeIndexer {
* <p/>
* This implementation checks if this {@link #node} is of type nt:resource
* and if that is the case, tries to extract text from the binary property
- * using the {@link #extractor}.
+ * using the {@link #parser}.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
@@ -466,7 +472,7 @@ public class NodeIndexer {
}
InternalValue type = getValue(NameConstants.JCR_MIMETYPE);
- if (type != null) {
+ if (type != null && isSupportedMediaType(type.getString())) {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, type.getString());
@@ -681,7 +687,7 @@ public class NodeIndexer {
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
- * @deprecated Use {@link #addStringValue(Document, String, Object, boolean)
+ * @deprecated Use {@link #addStringValue(Document, String, String, boolean)
* addStringValue(Document, String, Object, boolean)} instead.
*/
protected void addStringValue(Document doc, String fieldName, String internalValue) {
@@ -719,7 +725,7 @@ public class NodeIndexer {
* tokenized and added to the node scope fulltext
* index.
* @param boost the boost value for this string field.
- * @deprecated use {@link #addStringValue(Document, String, Object, boolean, boolean, float, boolean)} instead.
+ * @deprecated use {@link #addStringValue(Document, String, String, boolean, boolean, float, boolean)} instead.
*/
protected void addStringValue(Document doc, String fieldName,
String internalValue, boolean tokenized,
@@ -903,6 +909,20 @@ public class NodeIndexer {
}
/**
+ * Returns <code>true</code> if the provided type is among the types
+ * supported by the Tika parser we are using.
+ *
+ * @param type the type to check.
+ * @return whether the type is supported by the Tika parser we are using.
+ */
+ protected boolean isSupportedMediaType(final String type) {
+ if (supportedMediaTypes == null) {
+ supportedMediaTypes = parser.getSupportedTypes(null);
+ }
+ return supportedMediaTypes.contains(MediaType.parse(type));
+ }
+
+ /**
* Returns the boost value for the given property name.
*
* @param propertyName the name of a property.
Modified: jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java?rev=1418267&r1=1418266&r2=1418267&view=diff
==============================================================================
--- jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java (original)
+++ jackrabbit/branches/2.4/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/FulltextQueryTest.java Fri Dec 7 10:41:43 2012
@@ -343,7 +343,7 @@ public class FulltextQueryTest extends A
assertFileContains(
"test.txt", "text/plain", "AE502DBEA2C411DEBD340AD156D89593");
assertFileContains(
- "test.rtf", "text/rtf", "quick brown fox");
+ "test.rtf", "application/rtf", "quick brown fox");
}
private void assertFileContains(