You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/08/17 01:16:32 UTC
svn commit: r986158 - /lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp

Author: rmuir
Date: Mon Aug 16 23:16:32 2010
New Revision: 986158

URL: http://svn.apache.org/viewvc?rev=986158&view=rev
Log:
SOLR-2051: analysis.jsp lost attributes, giving incorrect results for protected words etc

Modified:
    lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp

Modified: lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp?rev=986158&r1=986157&r2=986158&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp (original)
+++ lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp Mon Aug 16 23:16:32 2010
@@ -16,7 +16,9 @@
  limitations under the License.
 --%>
 <%@ page import="org.apache.lucene.analysis.Analyzer,
-                 org.apache.lucene.analysis.Token,
+                 org.apache.lucene.util.AttributeSource,
+                 org.apache.lucene.util.Attribute,
+                 org.apache.lucene.util.BytesRef,
                  org.apache.lucene.analysis.TokenStream,
                  org.apache.lucene.index.Payload,
                  org.apache.lucene.analysis.CharReader,
@@ -152,9 +154,9 @@
       Analyzer analyzer =  field.getType().getQueryAnalyzer();
       TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
       tstream.reset();
-      List<Token> tokens = getTokens(tstream);
+      List<AttributeSource> tokens = getTokens(tstream);
       matches = new HashSet<Tok>();
-      for (Token t : tokens) { matches.add( new Tok(t,0)); }
+      for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); }
     }
 
     if (val!="") {
@@ -199,7 +201,7 @@
        }
 
        TokenStream tstream = tfac.create(tchain.charStream(new StringReader(val)));
-       List<Token> tokens = getTokens(tstream);
+       List<AttributeSource> tokens = getTokens(tstream);
        if (verbose) {
          writeHeader(out, tfac.getClass(), tfac.getArgs());
        }
@@ -211,24 +213,16 @@
            writeHeader(out, filtfac.getClass(), filtfac.getArgs());
          }
 
-         final Iterator<Token> iter = tokens.iterator();
-         tstream = filtfac.create( new TokenStream() {
-           CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-           OffsetAttribute offsetAtt = addAttribute (OffsetAttribute.class);
-           TypeAttribute typeAtt = addAttribute (TypeAttribute.class);
-           FlagsAttribute flagsAtt = addAttribute (FlagsAttribute.class);
-           PayloadAttribute payloadAtt = addAttribute (PayloadAttribute.class);
-           PositionIncrementAttribute posIncAtt = addAttribute (PositionIncrementAttribute.class);
+         final Iterator<AttributeSource> iter = tokens.iterator();
+         tstream = filtfac.create( new TokenStream(tstream.getAttributeFactory()) {
            
            public boolean incrementToken() throws IOException {
              if (iter.hasNext()) {
-               Token token = iter.next();
-               termAtt.copyBuffer(token.buffer(), 0, token.length());
-               offsetAtt.setOffset(token.startOffset(), token.endOffset());
-               typeAtt.setType(token.type());
-               flagsAtt.setFlags(token.getFlags());
-               posIncAtt.setPositionIncrement(token.getPositionIncrement());
-               payloadAtt.setPayload(token.getPayload());
+               AttributeSource token = iter.next();
+               Iterator<Class<? extends Attribute>> atts = token.getAttributeClassesIterator();
+               while (atts.hasNext()) // make sure all att impls in the token exist here
+                 addAttribute(atts.next());
+               token.copyTo(this);
                return true;
              } else {
                return false;
@@ -244,7 +238,7 @@
      } else {
        TokenStream tstream = analyzer.reusableTokenStream(field.getName(),new StringReader(val));
        tstream.reset();
-       List<Token> tokens = getTokens(tstream);
+       List<AttributeSource> tokens = getTokens(tstream);
        if (verbose) {
          writeHeader(out, analyzer.getClass(), new HashMap<String,String>());
        }
@@ -253,27 +247,14 @@
   }
 
 
-  static List<Token> getTokens(TokenStream tstream) throws IOException {
-    List<Token> tokens = new ArrayList<Token>();
-    CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class);
-    OffsetAttribute offsetAtt = tstream.addAttribute (OffsetAttribute.class);
-    TypeAttribute typeAtt = tstream.addAttribute (TypeAttribute.class);
-    FlagsAttribute flagsAtt = tstream.addAttribute (FlagsAttribute.class);
-    PayloadAttribute payloadAtt = tstream.addAttribute (PayloadAttribute.class);
-    PositionIncrementAttribute posIncAtt = tstream.addAttribute (PositionIncrementAttribute.class);
+  static List<AttributeSource> getTokens(TokenStream tstream) throws IOException {
+    List<AttributeSource> tokens = new ArrayList<AttributeSource>();
    
     while (true) {
       if (!tstream.incrementToken())
         break;
       else {
-      	Token token = new Token();
-      	token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
-      	token.setType(typeAtt.type());
-      	token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
-      	token.setPayload(payloadAtt.getPayload());
-      	token.setFlags(flagsAtt.getFlags());
-      	token.setPositionIncrement(posIncAtt.getPositionIncrement());
-      	tokens.add(token);
+      	tokens.add(tstream.cloneAttributes());
       }
     }
     return tokens;
@@ -281,9 +262,9 @@
 
 
   private static class Tok {
-    Token token;
+    AttributeSource token;
     int pos;
-    Tok(Token token, int pos) {
+    Tok(AttributeSource token, int pos) {
       this.token=token;
       this.pos=pos;
     }
@@ -297,6 +278,16 @@
     public String toString() {
       return token.toString();
     }
+    public String toPrintableString() {
+      TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class);
+      if (att instanceof CharTermAttribute)
+        return att.toString();
+      else {
+        BytesRef bytes = new BytesRef();
+        att.toBytesRef(bytes);
+        return bytes.toString();
+      }
+    }
   }
 
   private static interface ToStr {
@@ -368,7 +359,7 @@
 
 
   // readable, raw, pos, type, start/end
-  static void writeTokens(JspWriter out, List<Token> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException {
+  static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException {
 
     // Use a map to tell what tokens are in what positions
     // because some tokenizers/filters may do funky stuff with
@@ -376,12 +367,12 @@
     HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>();
     boolean needRaw=false;
     int pos=0;
-    for (Token t : tokens) {
+    for (AttributeSource t : tokens) {
       if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
         needRaw=true;
       }
 
-      pos += t.getPositionIncrement();
+      pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
       List lst = map.get(pos);
       if (lst==null) {
         lst = new ArrayList(1);
@@ -426,7 +417,7 @@
 
     printRow(out,"term text", arr, new ToStr() {
       public String toStr(Object o) {
-        return ft.indexedToReadable( ((Tok)o).token.toString() );
+        return ft.indexedToReadable( ((Tok)o).toPrintableString() );
       }
     }
             ,true
@@ -438,7 +429,7 @@
       printRow(out,"raw text", arr, new ToStr() {
         public String toStr(Object o) {
           // page is UTF-8, so anything goes.
-          return ((Tok)o).token.toString();
+          return ((Tok)o).toPrintableString();
         }
       }
               ,true
@@ -450,7 +441,7 @@
     if (verbose) {
       printRow(out,"term type", arr, new ToStr() {
         public String toStr(Object o) {
-          String tt =  ((Tok)o).token.type();
+          String tt =  ((Tok)o).token.addAttribute(TypeAttribute.class).type();
           if (tt == null) {
              return "null";
           } else {
@@ -467,8 +458,8 @@
     if (verbose) {
       printRow(out,"source start,end", arr, new ToStr() {
         public String toStr(Object o) {
-          Token t = ((Tok)o).token;
-          return Integer.toString(t.startOffset()) + ',' + t.endOffset() ;
+          AttributeSource t = ((Tok)o).token;
+          return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ;
         }
       }
               ,true
@@ -480,8 +471,8 @@
     if (verbose) {
       printRow(out,"payload", arr, new ToStr() {
         public String toStr(Object o) {
-          Token t = ((Tok)o).token;
-          Payload p = t.getPayload();
+          AttributeSource t = ((Tok)o).token;
+          Payload p = t.addAttribute(PayloadAttribute.class).getPayload();
           if( null != p ) {
             BigInteger bi = new BigInteger( p.getData() );
             String ret = bi.toString( 16 );