You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/08/23 00:28:12 UTC
svn commit: r1619944 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/indexer/IndexingFilter.java
Author: snagel
Date: Fri Aug 22 22:28:12 2014
New Revision: 1619944
URL: http://svn.apache.org/r1619944
Log:
NUTCH-1775 IndexingFilter: document origin of passed CrawlDatum
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1619944&r1=1619943&r2=1619944&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Aug 22 22:28:12 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1775 IndexingFilter: document origin of passed CrawlDatum (snagel)
+
* NUTCH-1693 TextMD5Signature computed on textual content (Tien Nguyen Manh, markus via snagel)
* NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval, generate.max.per.host.by.ip (Matthias Agethle via snagel)
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java?rev=1619944&r1=1619943&r2=1619944&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java Fri Aug 22 22:28:12 2014
@@ -39,15 +39,22 @@ public interface IndexingFilter extends
/**
* Adds fields or otherwise modifies the document that will be indexed for a
- * parse. Unwanted documents can be removed from indexing by returning a null value.
+ * parse. Unwanted documents can be removed from indexing by returning a null
+ * value.
*
- * @param doc document instance for collecting fields
- * @param parse parse data instance
- * @param url page url
- * @param datum crawl datum for the page
- * @param inlinks page inlinks
- * @return modified (or a new) document instance, or null (meaning the document
- * should be discarded)
+ * @param doc
+ * document instance for collecting fields
+ * @param parse
+ * parse data instance
+ * @param url
+ * page url
+ * @param datum
+ * crawl datum for the page (fetch datum from segment containing
+ * fetch status and fetch time)
+ * @param inlinks
+ * page inlinks
+ * @return modified (or a new) document instance, or null (meaning the
+ * document should be discarded)
* @throws IndexingException
*/
NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)