You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2012/01/18 19:35:21 UTC
svn commit: r1232988 - /lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html

Author: sarowe
Date: Wed Jan 18 18:35:21 2012
New Revision: 1232988

URL: http://svn.apache.org/viewvc?rev=1232988&view=rev
Log:
LUCENE-3666: fix example consumer code to follow the TokenStream API contract; fix cast-to-interface code to use interface method instead of instance field.

Modified:
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html?rev=1232988&r1=1232987&r2=1232988&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/package.html Wed Jan 18 18:35:21 2012
@@ -180,10 +180,24 @@ and proximity searches (though sentence 
   However an application might invoke Analysis of any text for testing or for any other purpose, something like:
 </p>
 <PRE class="prettyprint">
-    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_XY); // or any other analyzer
-    TokenStream ts = analyzer.tokenStream("myfield",new StringReader("some text goes here"));
-    while (ts.incrementToken()) {
-      System.out.println("token: "+ts));
+    Version matchVersion = Version.LUCENE_XY; // Substitute desired Lucene version for XY
+    Analyzer analyzer = new StandardAnalyzer(matchVersion); // or any other analyzer
+    TokenStream ts = analyzer.tokenStream("myfield", new StringReader("some text goes here"));
+    OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+    
+    try {
+      ts.reset(); // Resets this stream to the beginning. (Required)
+      while (ts.incrementToken()) {
+        // Use {@link org.apache.lucene.util.AttributeSource#reflectAsString(boolean)}
+        // for token stream debugging.
+        System.out.println("token: " + ts.reflectAsString(true));
+
+        System.out.println("token start offset: " + offsetAtt.startOffset());
+        System.out.println("  token end offset: " + offsetAtt.endOffset());
+      }
+      ts.end();   // Perform end-of-stream operations, e.g. set the final offset.
+    } finally {
+      ts.close(); // Release resources associated with this stream.
     }
 </PRE>
 <h2>Indexing Analysis vs. Search Analysis</h2>
@@ -458,15 +472,18 @@ public class MyAnalyzer extends Reusable
     // get the CharTermAttribute from the TokenStream
     CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
 
-    stream.reset();
+    try {
+      stream.reset();
     
-    // print all tokens until stream is exhausted
-    while (stream.incrementToken()) {
-      System.out.println(termAtt.toString());
-    }
+      // print all tokens until stream is exhausted
+      while (stream.incrementToken()) {
+        System.out.println(termAtt.toString());
+      }
     
-    stream.end()
-    stream.close();
+      stream.end()
+    } finally {
+      stream.close();
+    }
   }
 }
 </pre>
@@ -678,7 +695,7 @@ public final class PartOfSpeechAttribute
 
   {@literal @Override}
   public void copyTo(AttributeImpl target) {
-    ((PartOfSpeechAttribute) target).pos = pos;
+    ((PartOfSpeechAttribute) target).setPartOfSpeech(pos);
   }
 }
 </pre>
@@ -758,15 +775,18 @@ to make use of the new PartOfSpeechAttri
     // get the PartOfSpeechAttribute from the TokenStream
     PartOfSpeechAttribute posAtt = stream.addAttribute(PartOfSpeechAttribute.class);
     
-    stream.reset();
+    try {
+      stream.reset();
 
-    // print all tokens until stream is exhausted
-    while (stream.incrementToken()) {
-      System.out.println(termAtt.toString() + ": " + posAtt.getPartOfSpeech());
-    }
+      // print all tokens until stream is exhausted
+      while (stream.incrementToken()) {
+        System.out.println(termAtt.toString() + ": " + posAtt.getPartOfSpeech());
+      }
     
-    stream.end();
-    stream.close();
+      stream.end();
+    } finally {
+      stream.close();
+    }
   }
 </pre>
 The change that was made is to get the PartOfSpeechAttribute from the TokenStream and print out its contents in