You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/02/23 12:23:13 UTC

lucene-solr:branch_6x: add javadocs explaining SynonymGraphFilter's ignoreCase

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x e339954ce -> 52b70c225


add javadocs explaining SynonymGraphFilter's ignoreCase


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/52b70c22
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/52b70c22
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/52b70c22

Branch: refs/heads/branch_6x
Commit: 52b70c2255a419a25d1c922749977366d3eb550c
Parents: e339954
Author: Mike McCandless <mi...@apache.org>
Authored: Thu Feb 23 07:22:57 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Thu Feb 23 07:22:57 2017 -0500

----------------------------------------------------------------------
 .../analysis/synonym/SynonymGraphFilter.java    |  8 +++++
 .../synonym/TestSynonymGraphFilter.java         | 34 ++++++++++++++++++++
 2 files changed, 42 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/52b70c22/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
index 788db0a..e59e61b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
@@ -160,6 +160,14 @@ public final class SynonymGraphFilter extends TokenFilter {
     }
   }
 
+  /**
+   * Apply previously built synonyms to incoming tokens.
+   * @param input input tokenstream
+   * @param synonyms synonym map
+   * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}.
+   *                   Note, if you set this to true, it's your responsibility to lowercase
+   *                   the input entries when you create the {@link SynonymMap}
+   */
   public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
     super(input);
     this.synonyms = synonyms;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/52b70c22/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
index e00a165..730d00a 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
@@ -23,6 +23,7 @@ import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -1922,4 +1923,37 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
         new int[]{1, 1, 0, 1, 1});
     a.close();
   }
+
+  public void testUpperCase() throws IOException {
+    assertMapping("word", "synonym");
+    assertMapping("word".toUpperCase(Locale.ROOT), "synonym");
+  }
+
+  private void assertMapping(String inputString, String outputString) throws IOException {
+    SynonymMap.Builder builder = new SynonymMap.Builder(false);
+    // the rules must be lowercased up front, but the incoming tokens will be case insensitive:
+    CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder());
+    CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder());
+    builder.add(input, output, true);
+    Analyzer analyzer = new CustomAnalyzer(builder.build());
+    TokenStream tokenStream = analyzer.tokenStream("field", inputString);
+    assertTokenStreamContents(tokenStream, new String[]{
+        outputString, inputString
+      });
+  }
+
+  static class CustomAnalyzer extends Analyzer {
+    private SynonymMap synonymMap;
+
+    CustomAnalyzer(SynonymMap synonymMap) {
+      this.synonymMap = synonymMap;
+    }
+
+    @Override
+    protected TokenStreamComponents createComponents(String s) {
+      Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+      TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); // Ignore case True
+      return new TokenStreamComponents(tokenizer, tokenStream);
+    }
+  }
 }