You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2012/01/18 19:35:21 UTC
svn commit: r1232988 -
/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html
Author: sarowe
Date: Wed Jan 18 18:35:21 2012
New Revision: 1232988
URL: http://svn.apache.org/viewvc?rev=1232988&view=rev
Log:
LUCENE-3666: fix example consumer code to follow the TokenStream API contract; fix cast-to-interface code to use interface method instead of instance field.
Modified:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html?rev=1232988&r1=1232987&r2=1232988&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html Wed Jan 18 18:35:21 2012
@@ -180,10 +180,24 @@ and proximity searches (though sentence
However an application might invoke Analysis of any text for testing or for any other purpose, something like:
</p>
<PRE class="prettyprint">
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_XY); // or any other analyzer
- TokenStream ts = analyzer.tokenStream("myfield",new StringReader("some text goes here"));
- while (ts.incrementToken()) {
- System.out.println("token: "+ts));
+ Version matchVersion = Version.LUCENE_XY; // Substitute desired Lucene version for XY
+ Analyzer analyzer = new StandardAnalyzer(matchVersion); // or any other analyzer
+ TokenStream ts = analyzer.tokenStream("myfield", new StringReader("some text goes here"));
+ OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+ try {
+ ts.reset(); // Resets this stream to the beginning. (Required)
+ while (ts.incrementToken()) {
+ // Use {@link org.apache.lucene.util.AttributeSource#reflectAsString(boolean)}
+ // for token stream debugging.
+ System.out.println("token: " + ts.reflectAsString(true));
+
+ System.out.println("token start offset: " + offsetAtt.startOffset());
+ System.out.println(" token end offset: " + offsetAtt.endOffset());
+ }
+ ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
+ } finally {
+ ts.close(); // Release resources associated with this stream.
}
</PRE>
<h2>Indexing Analysis vs. Search Analysis</h2>
@@ -458,15 +472,18 @@ public class MyAnalyzer extends Reusable
// get the CharTermAttribute from the TokenStream
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
- stream.reset();
+ try {
+ stream.reset();
- // print all tokens until stream is exhausted
- while (stream.incrementToken()) {
- System.out.println(termAtt.toString());
- }
+ // print all tokens until stream is exhausted
+ while (stream.incrementToken()) {
+ System.out.println(termAtt.toString());
+ }
- stream.end()
- stream.close();
+ stream.end()
+ } finally {
+ stream.close();
+ }
}
}
</pre>
@@ -678,7 +695,7 @@ public final class PartOfSpeechAttribute
{@literal @Override}
public void copyTo(AttributeImpl target) {
- ((PartOfSpeechAttribute) target).pos = pos;
+ ((PartOfSpeechAttribute) target).setPartOfSpeech(pos);
}
}
</pre>
@@ -758,15 +775,18 @@ to make use of the new PartOfSpeechAttri
// get the PartOfSpeechAttribute from the TokenStream
PartOfSpeechAttribute posAtt = stream.addAttribute(PartOfSpeechAttribute.class);
- stream.reset();
+ try {
+ stream.reset();
- // print all tokens until stream is exhausted
- while (stream.incrementToken()) {
- System.out.println(termAtt.toString() + ": " + posAtt.getPartOfSpeech());
- }
+ // print all tokens until stream is exhausted
+ while (stream.incrementToken()) {
+ System.out.println(termAtt.toString() + ": " + posAtt.getPartOfSpeech());
+ }
- stream.end();
- stream.close();
+ stream.end();
+ } finally {
+ stream.close();
+ }
}
</pre>
The change that was made is to get the PartOfSpeechAttribute from the TokenStream and print out its contents in