You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/05/22 19:59:18 UTC

[41/50] [abbrv] lucene-solr:jira/solr-11779: LUCENE-8273: TestRandomChains found some more end() handling problems

LUCENE-8273: TestRandomChains found some more end() handling problems


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0c0fce3e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0c0fce3e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0c0fce3e

Branch: refs/heads/jira/solr-11779
Commit: 0c0fce3e98c9a01c330329eca5153fb78c7decaf
Parents: 63e2139
Author: Alan Woodward <ro...@apache.org>
Authored: Mon May 21 15:12:32 2018 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Mon May 21 15:12:32 2018 +0100

----------------------------------------------------------------------
 .../miscellaneous/ConditionalTokenFilter.java   | 16 ++--
 .../TestConditionalTokenFilter.java             | 77 ++++++++++++++++----
 2 files changed, 74 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0c0fce3e/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
index 6f9ea24..7de4fbd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
@@ -80,10 +80,10 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
     public void end() throws IOException {
       // imitate Tokenizer.end() call - endAttributes, set final offset
       if (exhausted) {
-        if (endCalled == false) {
+        if (endState == null) {
           input.end();
+          endState = captureState();
         }
-        endCalled = true;
         endOffset = offsetAtt.endOffset();
       }
       endAttributes();
@@ -96,7 +96,7 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
   private boolean lastTokenFiltered;
   private State bufferedState = null;
   private boolean exhausted;
-  private boolean endCalled;
+  private State endState = null;
   private int endOffset;
 
   private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
@@ -125,18 +125,22 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
     this.bufferedState = null;
     this.exhausted = false;
     this.endOffset = -1;
-    this.endCalled = false;
+    this.endState = null;
   }
 
   @Override
   public void end() throws IOException {
-    if (endCalled == false) {
+    if (endState == null) {
       super.end();
-      endCalled = true;
+      endState = captureState();
+    }
+    else {
+      restoreState(endState);
     }
     endOffset = getAttribute(OffsetAttribute.class).endOffset();
     if (lastTokenFiltered) {
       this.delegate.end();
+      endState = captureState();
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0c0fce3e/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
index fed7f68..511c725 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
@@ -37,7 +37,10 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ValidatingTokenFilter;
 import org.apache.lucene.analysis.core.TypeTokenFilter;
+import org.apache.lucene.analysis.de.GermanStemFilter;
+import org.apache.lucene.analysis.in.IndicNormalizationFilter;
 import org.apache.lucene.analysis.ngram.NGramTokenizer;
+import org.apache.lucene.analysis.shingle.FixedShingleFilter;
 import org.apache.lucene.analysis.shingle.ShingleFilter;
 import org.apache.lucene.analysis.standard.ClassicTokenizer;
 import org.apache.lucene.analysis.synonym.SolrSynonymParser;
@@ -308,19 +311,7 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
       protected TokenStreamComponents createComponents(String fieldName) {
         Tokenizer source = new NGramTokenizer();
         TokenStream sink = new KeywordRepeatFilter(source);
-        sink = new ConditionalTokenFilter(sink, in -> new TypeTokenFilter(in, Collections.singleton("word"))) {
-          Random random = new Random(seed);
-          @Override
-          protected boolean shouldFilter() throws IOException {
-            return random.nextBoolean();
-          }
-
-          @Override
-          public void reset() throws IOException {
-            super.reset();
-            random = new Random(seed);
-          }
-        };
+        sink = new RandomSkippingFilter(sink, seed, in -> new TypeTokenFilter(in, Collections.singleton("word")));
         sink = new ValidatingTokenFilter(sink, "last stage");
         return new TokenStreamComponents(source, sink);
       }
@@ -330,4 +321,64 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
 
   }
 
+  public void testEndWithShingles() throws IOException {
+    TokenStream ts = whitespaceMockTokenizer("cyk jvboq \u092e\u0962\u093f");
+    ts = new GermanStemFilter(ts);
+    ts = new NonRandomSkippingFilter(ts, in -> new FixedShingleFilter(in, 2), true, false, true);
+    ts = new NonRandomSkippingFilter(ts, IndicNormalizationFilter::new, true);
+
+    assertTokenStreamContents(ts, new String[]{"jvboq"});
+  }
+
+  private static class RandomSkippingFilter extends ConditionalTokenFilter {
+
+    Random random;
+    final long seed;
+
+    protected RandomSkippingFilter(TokenStream input, long seed, Function<TokenStream, TokenStream> inputFactory) {
+      super(input, inputFactory);
+      this.seed = seed;
+      this.random = new Random(seed);
+    }
+
+    @Override
+    protected boolean shouldFilter() throws IOException {
+      return random.nextBoolean();
+    }
+
+    @Override
+    public void reset() throws IOException {
+      super.reset();
+      random = new Random(seed);
+    }
+  }
+
+  private static class NonRandomSkippingFilter extends ConditionalTokenFilter {
+
+    final boolean[] shouldFilters;
+    int pos;
+
+    /**
+     * Create a new BypassingTokenFilter
+     *
+     * @param input        the input TokenStream
+     * @param inputFactory a factory function to create a new instance of the TokenFilter to wrap
+     */
+    protected NonRandomSkippingFilter(TokenStream input, Function<TokenStream, TokenStream> inputFactory, boolean... shouldFilters) {
+      super(input, inputFactory);
+      this.shouldFilters = shouldFilters;
+    }
+
+    @Override
+    protected boolean shouldFilter() throws IOException {
+      return shouldFilters[pos++ % shouldFilters.length];
+    }
+
+    @Override
+    public void reset() throws IOException {
+      super.reset();
+      pos = 0;
+    }
+  }
+
 }