You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2015/01/07 23:25:19 UTC
svn commit: r1650181 - in /nutch/trunk: CHANGES.txt
src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
Author: snagel
Date: Wed Jan 7 22:25:18 2015
New Revision: 1650181
URL: http://svn.apache.org/r1650181
Log:
NUTCH-1140 index-more plugin, resetTitle creates multiple values in title field
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1650181&r1=1650180&r2=1650181&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Jan 7 22:25:18 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1140 index-more plugin, resetTitle creates multiple values in title field (Joe Liedtke, kaveh minooie via snagel)
+
* NUTCH-1904 Schema for Solr4 doesn't include _version_ field (mattmann)
* NUTCH-1897 Easier debugging of plugin XML errors (markus)
Modified: nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=1650181&r1=1650180&r2=1650181&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Wed Jan 7 22:25:18 2015
@@ -289,7 +289,7 @@ public class MoreIndexingFilter implemen
private NutchDocument resetTitle(NutchDocument doc, ParseData data, String url) {
String contentDisposition = data.getMeta(Metadata.CONTENT_DISPOSITION);
- if (contentDisposition == null)
+ if (contentDisposition == null || doc.getFieldValue("title") != null)
return doc;
for (int i=0; i<patterns.length; i++) {
Modified: nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java?rev=1650181&r1=1650180&r2=1650181&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java Wed Jan 7 22:25:18 2015
@@ -82,11 +82,21 @@ public class TestMoreIndexingFilter {
MoreIndexingFilter filter = new MoreIndexingFilter();
filter.setConf(conf);
- NutchDocument doc = filter.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
- new ParseStatus(), "title", new Outlink[0], metadata)), new Text(
- "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+ Text url = new Text("http://www.example.com/");
+ ParseImpl parseImpl = new ParseImpl("text", new ParseData(
+ new ParseStatus(), "title", new Outlink[0], metadata));
+
+ NutchDocument doc = new NutchDocument();
+ doc = filter.filter(doc, parseImpl, url, new CrawlDatum(), new Inlinks());
Assert.assertEquals("content-disposition not detected", "filename.ext", doc.getFieldValue("title"));
+
+ /* NUTCH-1140: do not add second title to avoid a multi-valued title field */
+ doc = new NutchDocument();
+ doc.add("title", "title");
+ doc = filter.filter(doc, parseImpl, url, new CrawlDatum(), new Inlinks());
+ Assert.assertEquals("do not add second title by content-disposition",
+ "title", doc.getFieldValue("title"));
}
private void assertParts(String[] parts, int count, String... expected) {