You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2017/01/16 10:17:22 UTC
[5/6] lucene-solr:master: also copy all attributes for ngram token
filters
also copy all attributes for ngram token filters
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ea049b96
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ea049b96
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ea049b96
Branch: refs/heads/master
Commit: ea049b96a24d6afc582ecdf406e8bf256b9911d9
Parents: 80e2854
Author: Nathan Gass <ga...@search.ch>
Authored: Fri Jan 13 17:01:34 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 17:07:23 2017 +0100
----------------------------------------------------------------------
.../lucene/analysis/ngram/NGramTokenFilter.java | 19 +++++--------------
1 file changed, 5 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea049b96/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index e275cfa..cb5d447 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -23,9 +23,8 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource.State;
/**
* Tokenizes the input into n-grams of the given size(s).
@@ -52,14 +51,11 @@ public final class NGramTokenFilter extends TokenFilter {
private int curCodePointCount;
private int curGramSize;
private int curPos;
- private int curPosInc, curPosLen;
- private int tokStart;
- private int tokEnd;
+ private int curPosInc;
+ private State state;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAtt;
- private final PositionLengthAttribute posLenAtt;
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates NGramTokenFilter with given min and max n-grams.
@@ -79,7 +75,6 @@ public final class NGramTokenFilter extends TokenFilter {
this.maxGram = maxGram;
posIncAtt = addAttribute(PositionIncrementAttribute.class);
- posLenAtt = addAttribute(PositionLengthAttribute.class);
}
/**
@@ -104,9 +99,7 @@ public final class NGramTokenFilter extends TokenFilter {
curGramSize = minGram;
curPos = 0;
curPosInc = posIncAtt.getPositionIncrement();
- curPosLen = posLenAtt.getPositionLength();
- tokStart = offsetAtt.startOffset();
- tokEnd = offsetAtt.endOffset();
+ state = captureState();
}
}
@@ -115,14 +108,12 @@ public final class NGramTokenFilter extends TokenFilter {
curGramSize = minGram;
}
if ((curPos + curGramSize) <= curCodePointCount) {
- clearAttributes();
+ restoreState(state);
final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
termAtt.copyBuffer(curTermBuffer, start, end - start);
posIncAtt.setPositionIncrement(curPosInc);
curPosInc = 0;
- posLenAtt.setPositionLength(curPosLen);
- offsetAtt.setOffset(tokStart, tokEnd);
curGramSize++;
return true;
}