You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/01/17 23:00:45 UTC

lucene-solr:branch_6x: LUCENE-7619: don't let offsets go backwards

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 471417361 -> 03ffb1287


LUCENE-7619: don't let offsets go backwards


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/03ffb128
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/03ffb128
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/03ffb128

Branch: refs/heads/branch_6x
Commit: 03ffb1287d9908f8e1bb1417b7f18ca4645f209f
Parents: 4714173
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Jan 17 17:57:11 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Jan 17 17:57:55 2017 -0500

----------------------------------------------------------------------
 .../miscellaneous/WordDelimiterGraphFilter.java   | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/03ffb128/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
index ea6f6cd..fe8ed72 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
@@ -195,6 +195,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
   private int savedStartOffset;
   private int savedEndOffset;
   private AttributeSource.State savedState;
+  private int lastStartOffset;
   
   // if length by start + end offsets doesn't match the term text then assume
   // this is a synonym and don't adjust the offsets.
@@ -373,12 +374,24 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
         int endPart = bufferedParts[4*bufferedPos+3];
         bufferedPos++;
 
+        int startOffset;
+        int endOffset;
+
         if (hasIllegalOffsets) {
-          offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
+          startOffset = savedStartOffset;
+          endOffset = savedEndOffset;
         } else {
-          offsetAttribute.setOffset(savedStartOffset + startPart, savedStartOffset + endPart);
+          startOffset = savedStartOffset + startPart;
+          endOffset = savedStartOffset + endPart;
         }
 
+        // never let offsets go backwards:
+        startOffset = Math.max(startOffset, lastStartOffset);
+        endOffset = Math.max(endOffset, lastStartOffset);
+
+        offsetAttribute.setOffset(startOffset, endOffset);
+        lastStartOffset = startOffset;
+
         if (termPart == null) {
           termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
         } else {
@@ -402,6 +415,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
     super.reset();
     accumPosInc = 0;
     savedState = null;
+    lastStartOffset = 0;
     concat.clear();
     concatAll.clear();
   }