You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2013/01/18 22:08:05 UTC

svn commit: r1435342 - in /nutch/trunk: CHANGES.txt src/test/org/apache/nutch/indexer/TestIndexingFilters.java

Author: lewismc
Date: Fri Jan 18 21:08:04 2013
New Revision: 1435342

URL: http://svn.apache.org/viewvc?rev=1435342&view=rev
Log:
NUTCH-1453 Substantiate tests for IndexingFilters

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1435342&r1=1435341&r2=1435342&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jan 18 21:08:04 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk): Current Development
 
+* NUTCH-1453 Substantiate tests for IndexingFilters (lufeng via lewismc)
+
 * NUTCH-840  Port tests from parse-html to parse-tika (lewismc, jnioche)
 
 * NUTCH-1509 Implement read/write in NutchField (markus)

Modified: nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1435342&r1=1435341&r2=1435342&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Fri Jan 18 21:08:04 2013
@@ -37,14 +37,68 @@ public class TestIndexingFilters extends
    */
   public void testNonExistingIndexingFilter() throws IndexingException {
     Configuration conf = NutchConfiguration.create();
+      conf.addResource("nutch-default.xml");
+      conf.addResource("crawl-tests.xml");
+
     String class1 = "NonExistingFilter";
     String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
     conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
 
     IndexingFilters filters = new IndexingFilters(conf);
     filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
-        new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
-        "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+      new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
+      "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+  }
+
+  /**
+   * Test behaviour when NutchDOcument is null
+   */
+
+  public void testNutchDocumentNullIndexingFilter() throws IndexingException{
+    Configuration conf = NutchConfiguration.create();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("crawl-tests.xml");
+
+    IndexingFilters filters = new IndexingFilters(conf);
+    NutchDocument doc = filters.filter(null, new ParseImpl("text", new ParseData(
+      new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
+      "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+     
+    assertNull(doc);
+  }
+
+  /**
+   * Test behaviour when reset the index filter order will not take effect
+   *
+   * @throws IndexingException
+   */
+  public void testFilterCacheIndexingFilter() throws IndexingException{
+    Configuration conf = NutchConfiguration.create();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("crawl-tests.xml");
+
+    String class1 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
+    conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1);
+
+    IndexingFilters filters1 = new IndexingFilters(conf);
+    NutchDocument fdoc1 = filters1.filter(new NutchDocument(),new ParseImpl("text",new ParseData(
+      new ParseStatus(),"title",new Outlink[0],new Metadata())),new Text("http://www.example.com/"),
+      new CrawlDatum(),new Inlinks());
+
+    // add another index filter
+    String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer";
+    // set content metadata
+    Metadata md = new Metadata();
+    md.add("example","data");
+    // set content metadata property defined in MetadataIndexer
+    conf.set("index.content.md","example");
+    // add MetadataIndxer filter
+    conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
+    IndexingFilters filters2 = new IndexingFilters(conf);
+    NutchDocument fdoc2 = filters2.filter(new NutchDocument(),new ParseImpl("text",new ParseData(
+      new ParseStatus(),"title",new Outlink[0],md)),new Text("http://www.example.com/"),
+      new CrawlDatum(),new Inlinks());
+    assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size());
   }
 
 }