You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2017/01/18 15:46:17 UTC
[12/16] lucene-solr:jira/solr-8593: LUCENE-7619: don't let offsets go
backwards
LUCENE-7619: don't let offsets go backwards
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0bdcfc29
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0bdcfc29
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0bdcfc29
Branch: refs/heads/jira/solr-8593
Commit: 0bdcfc291fceab26e1c62a7e9791ce417671eacd
Parents: 39eec66
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Jan 17 17:57:11 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Jan 17 17:57:11 2017 -0500
----------------------------------------------------------------------
.../miscellaneous/WordDelimiterGraphFilter.java | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0bdcfc29/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
index ea6f6cd..fe8ed72 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
@@ -195,6 +195,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
private int savedStartOffset;
private int savedEndOffset;
private AttributeSource.State savedState;
+ private int lastStartOffset;
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
@@ -373,12 +374,24 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
int endPart = bufferedParts[4*bufferedPos+3];
bufferedPos++;
+ int startOffset;
+ int endOffset;
+
if (hasIllegalOffsets) {
- offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
+ startOffset = savedStartOffset;
+ endOffset = savedEndOffset;
} else {
- offsetAttribute.setOffset(savedStartOffset + startPart, savedStartOffset + endPart);
+ startOffset = savedStartOffset + startPart;
+ endOffset = savedStartOffset + endPart;
}
+ // never let offsets go backwards:
+ startOffset = Math.max(startOffset, lastStartOffset);
+ endOffset = Math.max(endOffset, lastStartOffset);
+
+ offsetAttribute.setOffset(startOffset, endOffset);
+ lastStartOffset = startOffset;
+
if (termPart == null) {
termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
} else {
@@ -402,6 +415,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
super.reset();
accumPosInc = 0;
savedState = null;
+ lastStartOffset = 0;
concat.clear();
concatAll.clear();
}