You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2017/01/16 21:27:27 UTC
[02/14] lucene-solr:jira/solr-5944: copy all attributes including
payload to new tokens
copy all attributes including payload to new tokens
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6570e6ec
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6570e6ec
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6570e6ec
Branch: refs/heads/jira/solr-5944
Commit: 6570e6ecc2b14a28da9873948083791ba47145d0
Parents: 61e4528
Author: Nathan Gass <ga...@search.ch>
Authored: Mon Jan 9 15:00:21 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 12:14:27 2017 +0100
----------------------------------------------------------------------
.../lucene/analysis/ngram/EdgeNGramTokenFilter.java | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6570e6ec/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 827e26f..303b7e320 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -22,9 +22,8 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource;
/**
* Tokenizes the given token into n-grams of given size(s).
@@ -43,15 +42,11 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
private int curTermLength;
private int curCodePointCount;
private int curGramSize;
- private int tokStart;
- private int tokEnd; // only used if the length changed before this filter
private int savePosIncr;
- private int savePosLen;
+ private AttributeSource attributes;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
@@ -86,17 +81,15 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
curTermLength = termAtt.length();
curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length());
curGramSize = minGram;
- tokStart = offsetAtt.startOffset();
- tokEnd = offsetAtt.endOffset();
+ attributes = input.cloneAttributes();
savePosIncr += posIncrAtt.getPositionIncrement();
- savePosLen = posLenAtt.getPositionLength();
}
}
if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit
if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
// grab gramSize chars from front or back
clearAttributes();
- offsetAtt.setOffset(tokStart, tokEnd);
+ attributes.copyTo(this);
// first ngram gets increment, others don't
if (curGramSize == minGram) {
posIncrAtt.setPositionIncrement(savePosIncr);
@@ -104,7 +97,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
} else {
posIncrAtt.setPositionIncrement(0);
}
- posLenAtt.setPositionLength(savePosLen);
final int charLength = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize);
termAtt.copyBuffer(curTermBuffer, 0, charLength);
curGramSize++;