You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/03/01 09:27:21 UTC

[13/50] [abbrv] lucene-solr:jira/solr-9858: add javadocs explaining SynonymGraphFilter's ignoreCase

add javadocs explaining SynonymGraphFilter's ignoreCase


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3ad6e419
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3ad6e419
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3ad6e419

Branch: refs/heads/jira/solr-9858
Commit: 3ad6e41910158a46025ff78330d78a31a7081887
Parents: 8ed8ecf
Author: Mike McCandless <mi...@apache.org>
Authored: Thu Feb 23 07:22:57 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Thu Feb 23 07:23:24 2017 -0500

----------------------------------------------------------------------
 .../analysis/synonym/SynonymGraphFilter.java    |  8 +++++
 .../synonym/TestSynonymGraphFilter.java         | 34 ++++++++++++++++++++
 2 files changed, 42 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3ad6e419/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
index 788db0a..e59e61b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
@@ -160,6 +160,14 @@ public final class SynonymGraphFilter extends TokenFilter {
     }
   }
 
+  /**
+   * Apply previously built synonyms to incoming tokens.
+   * @param input input tokenstream
+   * @param synonyms synonym map
+   * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}.
+   *                   Note, if you set this to true, it's your responsibility to lowercase
+   *                   the input entries when you create the {@link SynonymMap}
+   */
   public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
     super(input);
     this.synonyms = synonyms;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3ad6e419/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
index e00a165..730d00a 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
@@ -23,6 +23,7 @@ import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -1922,4 +1923,37 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
         new int[]{1, 1, 0, 1, 1});
     a.close();
   }
+
+  public void testUpperCase() throws IOException {
+    assertMapping("word", "synonym");
+    assertMapping("word".toUpperCase(Locale.ROOT), "synonym");
+  }
+
+  private void assertMapping(String inputString, String outputString) throws IOException {
+    SynonymMap.Builder builder = new SynonymMap.Builder(false);
+    // the rules must be lowercased up front, but the incoming tokens will be case insensitive:
+    CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder());
+    CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder());
+    builder.add(input, output, true);
+    Analyzer analyzer = new CustomAnalyzer(builder.build());
+    TokenStream tokenStream = analyzer.tokenStream("field", inputString);
+    assertTokenStreamContents(tokenStream, new String[]{
+        outputString, inputString
+      });
+  }
+
+  static class CustomAnalyzer extends Analyzer {
+    private SynonymMap synonymMap;
+
+    CustomAnalyzer(SynonymMap synonymMap) {
+      this.synonymMap = synonymMap;
+    }
+
+    @Override
+    protected TokenStreamComponents createComponents(String s) {
+      Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+      TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); // Ignore case True
+      return new TokenStreamComponents(tokenizer, tokenStream);
+    }
+  }
 }