You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/01/08 11:26:16 UTC
lucene-solr:master: TokenStreamToAutomaton failed to handle certain
holes correctly
Repository: lucene-solr
Updated Branches:
refs/heads/master 1aa9c4251 -> e64111c65
TokenStreamToAutomaton failed to handle certain holes correctly
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e64111c6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e64111c6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e64111c6
Branch: refs/heads/master
Commit: e64111c6545d8e05241bc87eb4623c5ed44db312
Parents: 1aa9c42
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Jan 8 06:26:08 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Jan 8 06:26:08 2017 -0500
----------------------------------------------------------------------
.../apache/lucene/analysis/TokenStreamToAutomaton.java | 11 ++++++++++-
.../org/apache/lucene/analysis/TestGraphTokenizers.java | 12 ++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e64111c6/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
index 071fa4a..64bac66 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
@@ -113,6 +113,7 @@ public class TokenStreamToAutomaton {
final RollingBuffer<Position> positions = new Positions();
int pos = -1;
+ int freedPos = 0;
Position posData = null;
int maxOffset = 0;
while (in.incrementToken()) {
@@ -150,7 +151,15 @@ public class TokenStreamToAutomaton {
addHoles(builder, positions, pos);
}
}
- positions.freeBefore(pos);
+ while (freedPos <= pos) {
+ Position freePosData = positions.get(freedPos);
+ // don't free this position yet if we may still need to fill holes over it:
+ if (freePosData.arriving == -1 || freePosData.leaving == -1) {
+ break;
+ }
+ positions.freeBefore(freedPos);
+ freedPos++;
+ }
}
final int endPos = pos + posLengthAtt.getPositionLength();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e64111c6/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
index 78fb127..8899dd1 100644
--- a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
+++ b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
@@ -585,4 +585,16 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
}
+
+ public void testTokenStreamGraphWithHoles() throws Exception {
+ final TokenStream ts = new CannedTokenStream(
+ new Token[] {
+ token("abc", 1, 1),
+ token("xyz", 1, 8),
+ token("def", 1, 1),
+ token("ghi", 1, 1),
+ });
+ assertSameLanguage(Operations.union(join(s2a("abc"), SEP_A, s2a("xyz")),
+ join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"), SEP_A, s2a("ghi"))), ts);
+ }
}