You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/01/08 11:27:08 UTC
lucene-solr:branch_6x: TokenStreamToAutomaton failed to handle
certain holes correctly
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 373826a69 -> 2336152fb
TokenStreamToAutomaton failed to handle certain holes correctly
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2336152f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2336152f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2336152f
Branch: refs/heads/branch_6x
Commit: 2336152fb4acf20bfc4936ad5e2cddde8efebaf1
Parents: 373826a
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Jan 8 06:26:08 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Jan 8 06:26:27 2017 -0500
----------------------------------------------------------------------
.../apache/lucene/analysis/TokenStreamToAutomaton.java | 11 ++++++++++-
.../org/apache/lucene/analysis/TestGraphTokenizers.java | 12 ++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2336152f/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
index 071fa4a..64bac66 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
@@ -113,6 +113,7 @@ public class TokenStreamToAutomaton {
final RollingBuffer<Position> positions = new Positions();
int pos = -1;
+ int freedPos = 0;
Position posData = null;
int maxOffset = 0;
while (in.incrementToken()) {
@@ -150,7 +151,15 @@ public class TokenStreamToAutomaton {
addHoles(builder, positions, pos);
}
}
- positions.freeBefore(pos);
+ while (freedPos <= pos) {
+ Position freePosData = positions.get(freedPos);
+ // don't free this position yet if we may still need to fill holes over it:
+ if (freePosData.arriving == -1 || freePosData.leaving == -1) {
+ break;
+ }
+ positions.freeBefore(freedPos);
+ freedPos++;
+ }
}
final int endPos = pos + posLengthAtt.getPositionLength();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2336152f/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
index 78fb127..8899dd1 100644
--- a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
+++ b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
@@ -585,4 +585,16 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
}
+
+ public void testTokenStreamGraphWithHoles() throws Exception {
+ final TokenStream ts = new CannedTokenStream(
+ new Token[] {
+ token("abc", 1, 1),
+ token("xyz", 1, 8),
+ token("def", 1, 1),
+ token("ghi", 1, 1),
+ });
+ assertSameLanguage(Operations.union(join(s2a("abc"), SEP_A, s2a("xyz")),
+ join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"), SEP_A, s2a("ghi"))), ts);
+ }
}