You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/01/08 11:26:16 UTC

lucene-solr:master: TokenStreamToAutomaton failed to handle certain holes correctly

Repository: lucene-solr
Updated Branches:
  refs/heads/master 1aa9c4251 -> e64111c65


TokenStreamToAutomaton failed to handle certain holes correctly


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e64111c6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e64111c6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e64111c6

Branch: refs/heads/master
Commit: e64111c6545d8e05241bc87eb4623c5ed44db312
Parents: 1aa9c42
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Jan 8 06:26:08 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Jan 8 06:26:08 2017 -0500

----------------------------------------------------------------------
 .../apache/lucene/analysis/TokenStreamToAutomaton.java  | 11 ++++++++++-
 .../org/apache/lucene/analysis/TestGraphTokenizers.java | 12 ++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e64111c6/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
index 071fa4a..64bac66 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
@@ -113,6 +113,7 @@ public class TokenStreamToAutomaton {
     final RollingBuffer<Position> positions = new Positions();
 
     int pos = -1;
+    int freedPos = 0;
     Position posData = null;
     int maxOffset = 0;
     while (in.incrementToken()) {
@@ -150,7 +151,15 @@ public class TokenStreamToAutomaton {
             addHoles(builder, positions, pos);
           }
         }
-        positions.freeBefore(pos);
+        while (freedPos <= pos) {
+          Position freePosData = positions.get(freedPos);
+          // don't free this position yet if we may still need to fill holes over it:
+          if (freePosData.arriving == -1 || freePosData.leaving == -1) {
+            break;
+          }
+          positions.freeBefore(freedPos);
+          freedPos++;
+        }
       }
 
       final int endPos = pos + posLengthAtt.getPositionLength();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e64111c6/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
index 78fb127..8899dd1 100644
--- a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
+++ b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
@@ -585,4 +585,16 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
       Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
       Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
   }
+
+  public void testTokenStreamGraphWithHoles() throws Exception {
+    final TokenStream ts = new CannedTokenStream(
+      new Token[] {
+        token("abc", 1, 1),
+        token("xyz", 1, 8),
+        token("def", 1, 1),
+        token("ghi", 1, 1),
+      });
+    assertSameLanguage(Operations.union(join(s2a("abc"), SEP_A, s2a("xyz")),
+                                        join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"), SEP_A, s2a("ghi"))), ts);
+  }
 }