You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2010/09/21 03:53:52 UTC
svn commit: r999181 - in /nutch/trunk: CHANGES.txt conf/nutch-default.xml
src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Author: mattmann
Date: Tue Sep 21 01:53:51 2010
New Revision: 999181
URL: http://svn.apache.org/viewvc?rev=999181&view=rev
Log:
- fix for NUTCH-901 Make index-more plug-in configurable
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml
nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=999181&r1=999180&r2=999181&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Sep 21 01:53:51 2010
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.0 - Current Development
+* NUTCH-901 Make index-more plug-in configurable (Markus Jelsma via mattmann)
+
* NUTCH-862 HttpClient null pointer exception (Sebastian Nagel via ab)
* NUTCH-904 "-resume" option is always processed as "false" in FetcherJob
Modified: nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=999181&r1=999180&r2=999181&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Tue Sep 21 01:53:51 2010
@@ -713,6 +713,19 @@
</description>
</property>
+
+<!-- moreindexingfilter plugin properties -->
+
+<property>
+ <name>moreIndexingFilter.indexMimeTypeParts</name>
+ <value>true</value>
+ <description>Determines whether the index-more plugin will split the mime-type
+ in sub parts, this requires the type field to be multi valued. Set to true for backward
+ compatibility. False will not split the mime-type.
+ </description>
+</property>
+
+
<!-- URL normalizer properties -->
<property>
Modified: nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=999181&r1=999180&r2=999181&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Tue Sep 21 01:53:51 2010
@@ -199,10 +199,13 @@ public class MoreIndexingFilter implemen
doc.add("type", scontentType);
- String[] parts = getParts(scontentType);
+ // Check if we need to split the content type in sub parts
+ if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
+ String[] parts = getParts(contentType);
- for (String part : parts) {
- doc.add("type", part);
+ for(String part: parts) {
+ doc.add("type", part);
+ }
}
// leave this for future improvement