You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2013/09/27 10:55:58 UTC
svn commit: r1526836 -
/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
Author: alexparvulescu
Date: Fri Sep 27 08:55:58 2013
New Revision: 1526836
URL: http://svn.apache.org/r1526836
Log:
OAK-1022 Add a custom Oak Lucene analizer
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1526836&r1=1526835&r2=1526836&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java Fri Sep 27 08:55:58 2013
@@ -16,24 +16,17 @@
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
-import java.io.IOException;
import java.io.Reader;
-import org.apache.jackrabbit.oak.plugins.index.lucene.util.OakWordTokenFilter;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.standard.StandardFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.util.Version;
public class OakAnalyzer extends Analyzer {
- /** Default maximum allowed token length */
- public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
- private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
private final Version matchVersion;
/**
@@ -47,36 +40,16 @@ public class OakAnalyzer extends Analyze
this.matchVersion = matchVersion;
}
- /**
- * Set maximum allowed token length. If a token is seen that exceeds this
- * length then it is discarded. This setting only takes effect the next time
- * tokenStream or tokenStream is called.
- */
- public void setMaxTokenLength(int length) {
- maxTokenLength = length;
- }
-
- /**
- * @see #setMaxTokenLength
- */
- public int getMaxTokenLength() {
- return maxTokenLength;
- }
-
@Override
protected TokenStreamComponents createComponents(final String fieldName,
final Reader reader) {
- final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
- src.setMaxTokenLength(maxTokenLength);
- TokenStream tok = new StandardFilter(matchVersion, src);
- tok = new LowerCaseFilter(matchVersion, tok);
- tok = new OakWordTokenFilter(matchVersion, tok);
- return new TokenStreamComponents(src, tok) {
- @Override
- protected void setReader(final Reader reader) throws IOException {
- src.setMaxTokenLength(OakAnalyzer.this.maxTokenLength);
- super.setReader(reader);
- }
- };
+ WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader);
+ TokenStream tok = new LowerCaseFilter(matchVersion, src);
+ tok = new WordDelimiterFilter(tok,
+ WordDelimiterFilter.GENERATE_WORD_PARTS
+ | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE
+ | WordDelimiterFilter.GENERATE_NUMBER_PARTS, null);
+
+ return new TokenStreamComponents(src, tok);
}
}