You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2013/09/27 10:55:58 UTC

svn commit: r1526836 - /jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java

Author: alexparvulescu
Date: Fri Sep 27 08:55:58 2013
New Revision: 1526836

URL: http://svn.apache.org/r1526836
Log:
OAK-1022 Add a custom Oak Lucene analizer


Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1526836&r1=1526835&r2=1526836&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java Fri Sep 27 08:55:58 2013
@@ -16,24 +16,17 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
-import java.io.IOException;
 import java.io.Reader;
 
-import org.apache.jackrabbit.oak.plugins.index.lucene.util.OakWordTokenFilter;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.standard.StandardFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 import org.apache.lucene.util.Version;
 
 public class OakAnalyzer extends Analyzer {
 
-    /** Default maximum allowed token length */
-    public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
-    private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
     private final Version matchVersion;
 
     /**
@@ -47,36 +40,16 @@ public class OakAnalyzer extends Analyze
         this.matchVersion = matchVersion;
     }
 
-    /**
-     * Set maximum allowed token length. If a token is seen that exceeds this
-     * length then it is discarded. This setting only takes effect the next time
-     * tokenStream or tokenStream is called.
-     */
-    public void setMaxTokenLength(int length) {
-        maxTokenLength = length;
-    }
-
-    /**
-     * @see #setMaxTokenLength
-     */
-    public int getMaxTokenLength() {
-        return maxTokenLength;
-    }
-
     @Override
     protected TokenStreamComponents createComponents(final String fieldName,
             final Reader reader) {
-        final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
-        src.setMaxTokenLength(maxTokenLength);
-        TokenStream tok = new StandardFilter(matchVersion, src);
-        tok = new LowerCaseFilter(matchVersion, tok);
-        tok = new OakWordTokenFilter(matchVersion, tok);
-        return new TokenStreamComponents(src, tok) {
-            @Override
-            protected void setReader(final Reader reader) throws IOException {
-                src.setMaxTokenLength(OakAnalyzer.this.maxTokenLength);
-                super.setReader(reader);
-            }
-        };
+        WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader);
+        TokenStream tok = new LowerCaseFilter(matchVersion, src);
+        tok = new WordDelimiterFilter(tok,
+                WordDelimiterFilter.GENERATE_WORD_PARTS
+                        | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE
+                        | WordDelimiterFilter.GENERATE_NUMBER_PARTS, null);
+
+        return new TokenStreamComponents(src, tok);
     }
 }