You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2013/01/18 22:08:05 UTC
svn commit: r1435342 - in /nutch/trunk: CHANGES.txt
src/test/org/apache/nutch/indexer/TestIndexingFilters.java
Author: lewismc
Date: Fri Jan 18 21:08:04 2013
New Revision: 1435342
URL: http://svn.apache.org/viewvc?rev=1435342&view=rev
Log:
NUTCH-1453 Substantiate tests for IndexingFilters
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1435342&r1=1435341&r2=1435342&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jan 18 21:08:04 2013
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk): Current Development
+* NUTCH-1453 Substantiate tests for IndexingFilters (lufeng via lewismc)
+
* NUTCH-840 Port tests from parse-html to parse-tika (lewismc, jnioche)
* NUTCH-1509 Implement read/write in NutchField (markus)
Modified: nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1435342&r1=1435341&r2=1435342&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Fri Jan 18 21:08:04 2013
@@ -37,14 +37,68 @@ public class TestIndexingFilters extends
*/
public void testNonExistingIndexingFilter() throws IndexingException {
Configuration conf = NutchConfiguration.create();
+ conf.addResource("nutch-default.xml");
+ conf.addResource("crawl-tests.xml");
+
String class1 = "NonExistingFilter";
String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
IndexingFilters filters = new IndexingFilters(conf);
filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
- new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
- "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+ new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
+ "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+ }
+
+ /**
+ * Test behaviour when NutchDOcument is null
+ */
+
+ public void testNutchDocumentNullIndexingFilter() throws IndexingException{
+ Configuration conf = NutchConfiguration.create();
+ conf.addResource("nutch-default.xml");
+ conf.addResource("crawl-tests.xml");
+
+ IndexingFilters filters = new IndexingFilters(conf);
+ NutchDocument doc = filters.filter(null, new ParseImpl("text", new ParseData(
+ new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
+ "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+
+ assertNull(doc);
+ }
+
+ /**
+ * Test behaviour when reset the index filter order will not take effect
+ *
+ * @throws IndexingException
+ */
+ public void testFilterCacheIndexingFilter() throws IndexingException{
+ Configuration conf = NutchConfiguration.create();
+ conf.addResource("nutch-default.xml");
+ conf.addResource("crawl-tests.xml");
+
+ String class1 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
+ conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1);
+
+ IndexingFilters filters1 = new IndexingFilters(conf);
+ NutchDocument fdoc1 = filters1.filter(new NutchDocument(),new ParseImpl("text",new ParseData(
+ new ParseStatus(),"title",new Outlink[0],new Metadata())),new Text("http://www.example.com/"),
+ new CrawlDatum(),new Inlinks());
+
+ // add another index filter
+ String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer";
+ // set content metadata
+ Metadata md = new Metadata();
+ md.add("example","data");
+ // set content metadata property defined in MetadataIndexer
+ conf.set("index.content.md","example");
+ // add MetadataIndxer filter
+ conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
+ IndexingFilters filters2 = new IndexingFilters(conf);
+ NutchDocument fdoc2 = filters2.filter(new NutchDocument(),new ParseImpl("text",new ParseData(
+ new ParseStatus(),"title",new Outlink[0],md)),new Text("http://www.example.com/"),
+ new CrawlDatum(),new Inlinks());
+ assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size());
}
}