You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2007/03/02 18:54:28 UTC
svn commit: r513866 -
/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
Author: otis
Date: Fri Mar 2 09:54:27 2007
New Revision: 513866
URL: http://svn.apache.org/viewvc?view=rev&rev=513866
Log:
- 2-char indentation
Modified:
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?view=diff&rev=513866&r1=513865&r2=513866
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Fri Mar 2 09:54:27 2007
@@ -28,63 +28,63 @@
* @author Otis Gospodnetic
*/
public class NGramTokenizer extends Tokenizer {
- public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
- public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
+ public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
+ public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
- private int minGram, maxGram;
- private int gramSize;
- private int pos = 0;
- private int inLen;
- private String inStr;
- private boolean started = false;
+ private int minGram, maxGram;
+ private int gramSize;
+ private int pos = 0;
+ private int inLen;
+ private String inStr;
+ private boolean started = false;
- /**
- * Creates NGramTokenizer with given min and max n-grams.
- * @param input Reader holding the input to be tokenized
- * @param minGram the smallest n-gram to generate
- * @param maxGram the largest n-gram to generate
- */
- public NGramTokenizer(Reader input, int minGram, int maxGram) {
- super(input);
- if (minGram < 1) {
- throw new IllegalArgumentException("minGram must be greater than zero");
- }
- if (minGram > maxGram) {
- throw new IllegalArgumentException("minGram must not be greater than maxGram");
- }
- this.minGram = minGram;
- this.maxGram = maxGram;
+ /**
+ * Creates NGramTokenizer with given min and max n-grams.
+ * @param input Reader holding the input to be tokenized
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public NGramTokenizer(Reader input, int minGram, int maxGram) {
+ super(input);
+ if (minGram < 1) {
+ throw new IllegalArgumentException("minGram must be greater than zero");
}
- /**
- * Creates NGramTokenizer with default min and max n-grams.
- * @param input Reader holding the input to be tokenized
- */
- public NGramTokenizer(Reader input) {
- this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
+ if (minGram > maxGram) {
+ throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+ }
+ /**
+ * Creates NGramTokenizer with default min and max n-grams.
+ * @param input Reader holding the input to be tokenized
+ */
+ public NGramTokenizer(Reader input) {
+ this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
+ }
- /** Returns the next token in the stream, or null at EOS. */
- public final Token next() throws IOException {
- if (!started) {
- started = true;
- gramSize = minGram;
- char[] chars = new char[1024];
- input.read(chars);
- inStr = new String(chars).trim(); // remove any trailing empty strings
- inLen = inStr.length();
- }
+ /** Returns the next token in the stream, or null at EOS. */
+ public final Token next() throws IOException {
+ if (!started) {
+ started = true;
+ gramSize = minGram;
+ char[] chars = new char[1024];
+ input.read(chars);
+ inStr = new String(chars).trim(); // remove any trailing empty strings
+ inLen = inStr.length();
+ }
- if (pos+gramSize > inLen) { // if we hit the end of the string
- pos = 0; // reset to beginning of string
- gramSize++; // increase n-gram size
- if (gramSize > maxGram) // we are done
- return null;
- if (pos+gramSize > inLen)
- return null;
- }
- String gram = inStr.substring(pos, pos+gramSize);
- int oldPos = pos;
- pos++;
- return new Token(gram, oldPos, oldPos+gramSize);
+ if (pos+gramSize > inLen) { // if we hit the end of the string
+ pos = 0; // reset to beginning of string
+ gramSize++; // increase n-gram size
+ if (gramSize > maxGram) // we are done
+ return null;
+ if (pos+gramSize > inLen)
+ return null;
}
+ String gram = inStr.substring(pos, pos+gramSize);
+ int oldPos = pos;
+ pos++;
+ return new Token(gram, oldPos, oldPos+gramSize);
+ }
}