You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2009/07/19 17:06:58 UTC
svn commit: r795553 -
/lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
Author: gsingers
Date: Sun Jul 19 15:06:57 2009
New Revision: 795553
URL: http://svn.apache.org/viewvc?rev=795553&view=rev
Log:
Javadoc updates
Modified:
lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
Modified: lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java?rev=795553&r1=795552&r2=795553&view=diff
==============================================================================
--- lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (original)
+++ lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java Sun Jul 19 15:06:57 2009
@@ -90,9 +90,17 @@
/** @deprecated Please use {@link #TOKEN_TYPES} instead */
public static final String [] tokenImage = TOKEN_TYPES;
-
+ /**
+ * Only output tokens
+ */
public static final int TOKENS_ONLY = 0;
+ /**
+ * Only output untokenized tokens, which are tokens that would normally be split into several tokens
+ */
public static final int UNTOKENIZED_ONLY = 1;
+ /**
+ * Output the both the untokenized token and the splits
+ */
public static final int BOTH = 2;
/**
* This flag is used to indicate that the produced "Token" would, if {@link #TOKENS_ONLY} was used, produce multiple tokens.
@@ -121,7 +129,14 @@
this(input, TOKENS_ONLY, Collections.EMPTY_SET);
}
-
+ /**
+ * Createa a new instance of the {@link org.apache.lucene.wikipedia.analysis.WikipediaTokenizer}. Attaches the
+ * <conde>input</code> to a the newly created JFlex scanner.
+ *
+ * @param input The input
+ * @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
+ * @param untokenizedTypes
+ */
public WikipediaTokenizer(Reader input, int tokenOutput, Set untokenizedTypes) {
super(input);
this.tokenOutput = tokenOutput;