You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@nutch.apache.org by Sagar Vibhute <sa...@gmail.com> on 2007/10/11 05:30:39 UTC

How to add a field to results?

Hi,

I modified the plugin example given on the wiki to make a simple one.

I am trying to add the current time of day in HH:MM:SS format to the index.
My plugin is compiling as well.

However, in the results, when I click on explain, my field has not gotten
added.

Can someone please help out in this?

Here are the two java files I am using:

TimestampIndexer.java

package org.apache.nutch.parse.timestamp;

// JDK import
import java.util.logging.Logger;
import java.util.Calendar;
//import java.util.Date;

// Commons imports
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;


// Nutch imports
import org.apache.nutch.util.LogUtil;
import org.apache.nutch.fetcher.FetcherOutput;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.parse.Parse;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;

// Lucene imports
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;

public class TimestampIndexer implements IndexingFilter {

  private Configuration conf;

    String time;
    Calendar now;

  public TimestampIndexer() {
  }

  public Document filter(Document doc, Parse parse, Text url,
    CrawlDatum datum, Inlinks inlinks)
    throws IndexingException {

    now = Calendar.getInstance();
    time = now.get(Calendar.HOUR_OF_DAY)+":"+now.get(Calendar.MINUTE
)+":"+now.get(Calendar.SECOND);

            Field timestampField =
                new Field("timestamp", time, Field.Store.YES,
Field.Index.UN_TOKENIZED);
            doc.add(timestampField);

    return doc;
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  public Configuration getConf() {
    return this.conf;
  }
}

TimestampQueryFilter.java

package org.apache.nutch.parse.timestamp;

import org.apache.nutch.searcher.FieldQueryFilter;

import java.util.logging.Logger;

// Commons imports
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;


public class TimestampQueryFilter extends FieldQueryFilter {

    public TimestampQueryFilter() {
        super("timestamp");
    }

}

----------------------------

Thank you.

- Sagar