You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/01/17 23:00:45 UTC
lucene-solr:branch_6x: LUCENE-7619: don't let offsets go backwards
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 471417361 -> 03ffb1287
LUCENE-7619: don't let offsets go backwards
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/03ffb128
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/03ffb128
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/03ffb128
Branch: refs/heads/branch_6x
Commit: 03ffb1287d9908f8e1bb1417b7f18ca4645f209f
Parents: 4714173
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Jan 17 17:57:11 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Jan 17 17:57:55 2017 -0500
----------------------------------------------------------------------
.../miscellaneous/WordDelimiterGraphFilter.java | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/03ffb128/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
index ea6f6cd..fe8ed72 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
@@ -195,6 +195,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
private int savedStartOffset;
private int savedEndOffset;
private AttributeSource.State savedState;
+ private int lastStartOffset;
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
@@ -373,12 +374,24 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
int endPart = bufferedParts[4*bufferedPos+3];
bufferedPos++;
+ int startOffset;
+ int endOffset;
+
if (hasIllegalOffsets) {
- offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
+ startOffset = savedStartOffset;
+ endOffset = savedEndOffset;
} else {
- offsetAttribute.setOffset(savedStartOffset + startPart, savedStartOffset + endPart);
+ startOffset = savedStartOffset + startPart;
+ endOffset = savedStartOffset + endPart;
}
+ // never let offsets go backwards:
+ startOffset = Math.max(startOffset, lastStartOffset);
+ endOffset = Math.max(endOffset, lastStartOffset);
+
+ offsetAttribute.setOffset(startOffset, endOffset);
+ lastStartOffset = startOffset;
+
if (termPart == null) {
termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
} else {
@@ -402,6 +415,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
super.reset();
accumPosInc = 0;
savedState = null;
+ lastStartOffset = 0;
concat.clear();
concatAll.clear();
}