You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2017/01/16 21:27:30 UTC

[05/14] lucene-solr:jira/solr-5944: also copy all attributes for ngram token filters

also copy all attributes for ngram token filters


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ea049b96
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ea049b96
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ea049b96

Branch: refs/heads/jira/solr-5944
Commit: ea049b96a24d6afc582ecdf406e8bf256b9911d9
Parents: 80e2854
Author: Nathan Gass <ga...@search.ch>
Authored: Fri Jan 13 17:01:34 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 17:07:23 2017 +0100

----------------------------------------------------------------------
 .../lucene/analysis/ngram/NGramTokenFilter.java  | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea049b96/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index e275cfa..cb5d447 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -23,9 +23,8 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource.State;
 
 /**
  * Tokenizes the input into n-grams of the given size(s).
@@ -52,14 +51,11 @@ public final class NGramTokenFilter extends TokenFilter {
   private int curCodePointCount;
   private int curGramSize;
   private int curPos;
-  private int curPosInc, curPosLen;
-  private int tokStart;
-  private int tokEnd;
+  private int curPosInc;
+  private State state;
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt;
-  private final PositionLengthAttribute posLenAtt;
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
 
   /**
    * Creates NGramTokenFilter with given min and max n-grams.
@@ -79,7 +75,6 @@ public final class NGramTokenFilter extends TokenFilter {
     this.maxGram = maxGram;
 
     posIncAtt = addAttribute(PositionIncrementAttribute.class);
-    posLenAtt = addAttribute(PositionLengthAttribute.class);
   }
 
   /**
@@ -104,9 +99,7 @@ public final class NGramTokenFilter extends TokenFilter {
           curGramSize = minGram;
           curPos = 0;
           curPosInc = posIncAtt.getPositionIncrement();
-          curPosLen = posLenAtt.getPositionLength();
-          tokStart = offsetAtt.startOffset();
-          tokEnd = offsetAtt.endOffset();
+          state = captureState();
         }
       }
 
@@ -115,14 +108,12 @@ public final class NGramTokenFilter extends TokenFilter {
         curGramSize = minGram;
       }
       if ((curPos + curGramSize) <= curCodePointCount) {
-        clearAttributes();
+        restoreState(state);
         final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
         final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
         termAtt.copyBuffer(curTermBuffer, start, end - start);
         posIncAtt.setPositionIncrement(curPosInc);
         curPosInc = 0;
-        posLenAtt.setPositionLength(curPosLen);
-        offsetAtt.setOffset(tokStart, tokEnd);
         curGramSize++;
         return true;
       }