You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by Stephanie Belton <st...@zizou.net> on 2006/12/01 23:53:02 UTC

RE: Using dismax to find multiple terms across multiple fields

Thank you for your message Yonik, that was very helpful. I didn't have much luck with the SnowballPorterFilterFactory so I wrote my own factory last night and as you said it gives me much more flexibility. Here it is for anyone who's interested:

package myApp;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.solr.analysis.BaseTokenFilterFactory;
import org.apache.lucene.analysis.ru.RussianStemFilter;
import org.apache.lucene.analysis.ru.RussianCharsets;


import java.io.Reader;

public class RussianStemFilterFactory extends BaseTokenFilterFactory {
   public TokenStream create(TokenStream input) {
      String charsetName = getArgs().get("charset");
      char[] charset = RussianCharsets.UnicodeRussian;
      if ( charsetName!= null && charsetName.equals("KOI8")) charset = RussianCharsets.KOI8;
      if ( charsetName!= null && charsetName.equals("CP1251")) charset = RussianCharsets.CP1251;
      return new RussianStemFilter(input, charset);
   }
}