You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/01/19 23:41:17 UTC
svn commit: r1061039 - in /lucene/dev/trunk: ./ lucene/
lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/
lucene/src/java/org/apache/lucene/analysis/
lucene/src/java/org/apache/lucene/analysis/tokenattributes/ lucene/sr...
Author: uschindler
Date: Wed Jan 19 22:41:16 2011
New Revision: 1061039
URL: http://svn.apache.org/viewvc?rev=1061039&view=rev
Log:
LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the contents of AttributeImpl and AttributeSource using a well-defined API
Added:
lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/
- copied from r1060784, lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/
lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java
- copied unchanged from r1060784, lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeReflector.java
- copied unchanged from r1060784, lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/AttributeReflector.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java (with props)
Removed:
lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
Modified:
lucene/dev/trunk/ (props changed)
lucene/dev/trunk/lucene/ (props changed)
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/MIGRATE.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeSource.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/_TestUtil.java
lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java
lucene/dev/trunk/solr/ (props changed)
lucene/dev/trunk/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml
lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java
lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jan 19 22:41:16 2011
@@ -362,9 +362,9 @@ Changes in backwards compatibility polic
* LUCENE-2302: The new interface for term attributes, CharTermAttribute,
now implements CharSequence. This requires the toString() methods of
CharTermAttribute, deprecated TermAttribute, and Token to return only
- the term text and no other attribute contents.
- TODO: Point to new attribute inspection API coming with LUCENE-2374.
- (Uwe Schindler, Robert Muir)
+ the term text and no other attribute contents. LUCENE-2374 implements
+ an attribute reflection API to no longer rely on toString() for attribute
+ inspection. (Uwe Schindler, Robert Muir)
* LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer,
PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed
@@ -592,6 +592,23 @@ API Changes
to ensure that the norm is encoded with your Similarity.
(Robert Muir, Mike McCandless)
+* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the
+ contents of AttributeImpl and AttributeSource using a well-defined API.
+ This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes
+ in a structured way.
+ There are also some backwards incompatible changes in toString() output,
+ as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute
+ leading to changed toString() return values. The new API allows to get a
+ string representation in a well-defined way using a new method
+ reflectAsString(). For backwards compatibility reasons, when toString()
+ was implemented by implementation subclasses, the default implementation of
+ AttributeImpl.reflectWith() uses toString()s output instead to report the
+ Attribute's properties. Otherwise, reflectWith() uses Java's reflection
+ (like toString() did before) to get the attribute properties.
+ In addition, the mandatory equals() and hashCode() are no longer required
+ for AttributeImpls, but can still be provided (if needed).
+ (Uwe Schindler)
+
Bug fixes
* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on
Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Wed Jan 19 22:41:16 2011
@@ -328,3 +328,10 @@ LUCENE-1458, LUCENE-2111: Flexible Index
* LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong
are final. If you subclassed this code before to encode variable-length
integers in some specialized way, use the Codec API instead.
+
+* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct
+ reflection of AttributeImpl instances, where the reflection was done using deprecated
+ toString() parsing, you have to now override reflectWith() to customize output.
+ toString() is no longer implemented by AttributeImpl, so if you have overridden
+ toString(), port your customization over to reflectWith(). reflectAsString() would
+ then return what toString() did before.
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Wed Jan 19 22:41:16 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.document.NumericField; // for javadocs
@@ -168,17 +169,18 @@ public final class NumericTokenStream ex
// this attribute has no contents to clear!
// we keep it untouched as it's fully controlled by outer class.
}
-
- @Override
- public boolean equals(Object other) {
- return other == this;
- }
-
+
@Override
- public int hashCode() {
- return System.identityHashCode(this);
+ public void reflectWith(AttributeReflector reflector) {
+ final BytesRef bytes = new BytesRef();
+ toBytesRef(bytes);
+ reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
+ reflector.reflect(NumericTermAttribute.class, "shift", shift);
+ reflector.reflect(NumericTermAttribute.class, "rawValue", rawValue);
+ reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
+ reflector.reflect(NumericTermAttribute.class, "precisionStep", precisionStep);
}
-
+
@Override
public void copyTo(AttributeImpl target) {
final NumericTermAttribute a = (NumericTermAttribute) target;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java Wed Jan 19 22:41:16 2011
@@ -28,6 +28,7 @@ import org.apache.lucene.index.DocsAndPo
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
/**
A Token is an occurrence of a term from the text of a field. It consists of
@@ -588,6 +589,17 @@ public class Token extends CharTermAttri
}
}
+ @Override
+ public void reflectWith(AttributeReflector reflector) {
+ super.reflectWith(reflector);
+ reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
+ reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
+ reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
+ reflector.reflect(PayloadAttribute.class, "payload", payload);
+ reflector.reflect(FlagsAttribute.class, "flags", flags);
+ reflector.reflect(TypeAttribute.class, "type", type);
+ }
+
/** Convenience factory that returns <code>Token</code> as implementation for the basic
* attributes and return the default impl (with "Impl" appended) for all other
* attributes.
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Wed Jan 19 22:41:16 2011
@@ -23,6 +23,7 @@ import java.nio.CharBuffer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
@@ -244,6 +245,14 @@ public class CharTermAttributeImpl exten
}
@Override
+ public void reflectWith(AttributeReflector reflector) {
+ reflector.reflect(CharTermAttribute.class, "term", toString());
+ final BytesRef bytes = new BytesRef();
+ toBytesRef(bytes);
+ reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
+ }
+
+ @Override
public void copyTo(AttributeImpl target) {
CharTermAttribute t = (CharTermAttribute) target;
t.copyBuffer(termBuffer, 0, termLength);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java Wed Jan 19 22:41:16 2011
@@ -37,20 +37,6 @@ public final class BoostAttributeImpl ex
public void clear() {
boost = 1.0f;
}
-
- @Override
- public boolean equals(Object other) {
- if (this == other)
- return true;
- if (other instanceof BoostAttributeImpl)
- return ((BoostAttributeImpl) other).boost == boost;
- return false;
- }
-
- @Override
- public int hashCode() {
- return Float.floatToIntBits(boost);
- }
@Override
public void copyTo(AttributeImpl target) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java Wed Jan 19 22:41:16 2011
@@ -48,25 +48,6 @@ public final class MaxNonCompetitiveBoos
maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
competitiveTerm = null;
}
-
- @Override
- public boolean equals(Object other) {
- if (this == other)
- return true;
- if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
- final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
- return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
- && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
- if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
- return hash;
- }
@Override
public void copyTo(AttributeImpl target) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeImpl.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeImpl.java Wed Jan 19 22:41:16 2011
@@ -20,6 +20,8 @@ package org.apache.lucene.util;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
+import java.lang.ref.WeakReference;
+import java.util.LinkedList;
/**
* Base class for Attributes that can be added to a
@@ -37,72 +39,80 @@ public abstract class AttributeImpl impl
public abstract void clear();
/**
- * The default implementation of this method accesses all declared
- * fields of this object and prints the values in the following syntax:
+ * This method returns the current attribute values as a string in the following format
+ * by calling the {@link #reflectWith(AttributeReflector)} method:
*
+ * <ul>
+ * <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
+ * <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
+ * </ul>
+ *
+ * @see #reflectWith(AttributeReflector)
+ */
+ public final String reflectAsString(final boolean prependAttClass) {
+ final StringBuilder buffer = new StringBuilder();
+ reflectWith(new AttributeReflector() {
+ public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+ if (buffer.length() > 0) {
+ buffer.append(',');
+ }
+ if (prependAttClass) {
+ buffer.append(attClass.getName()).append('#');
+ }
+ buffer.append(key).append('=').append((value == null) ? "null" : value);
+ }
+ });
+ return buffer.toString();
+ }
+
+ /**
+ * This method is for introspection of attributes, it should simply
+ * add the key/values this attribute holds to the given {@link AttributeReflector}.
+ *
+ * <p>The default implementation calls {@link AttributeReflector#reflect} for all
+ * non-static fields from the implementing class, using the field name as key
+ * and the field value as value. The Attribute class is also determined by reflection.
+ * Please note that the default implementation can only handle single-Attribute
+ * implementations.
+ *
+ * <p>Custom implementations look like this (e.g. for a combined attribute implementation):
* <pre>
- * public String toString() {
- * return "start=" + startOffset + ",end=" + endOffset;
+ * public void reflectWith(AttributeReflector reflector) {
+ * reflector.reflect(CharTermAttribute.class, "term", term());
+ * reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
* }
* </pre>
- *
- * This method may be overridden by subclasses.
- */
- @Override
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- Class<?> clazz = this.getClass();
- Field[] fields = clazz.getDeclaredFields();
+ *
+ * <p>If you implement this method, make sure that for each invocation, the same set of {@link Attribute}
+ * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly
+ * different values. So don't automatically exclude e.g. {@code null} properties!
+ *
+ * @see #reflectAsString(boolean)
+ */
+ public void reflectWith(AttributeReflector reflector) {
+ final Class<? extends AttributeImpl> clazz = this.getClass();
+ final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
+ if (interfaces.size() != 1) {
+ throw new UnsupportedOperationException(clazz.getName() +
+ " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
+ }
+ final Class<? extends Attribute> interf = interfaces.getFirst().get();
+ final Field[] fields = clazz.getDeclaredFields();
try {
for (int i = 0; i < fields.length; i++) {
- Field f = fields[i];
+ final Field f = fields[i];
if (Modifier.isStatic(f.getModifiers())) continue;
f.setAccessible(true);
- Object value = f.get(this);
- if (buffer.length()>0) {
- buffer.append(',');
- }
- if (value == null) {
- buffer.append(f.getName() + "=null");
- } else {
- buffer.append(f.getName() + "=" + value);
- }
+ reflector.reflect(interf, f.getName(), f.get(this));
}
} catch (IllegalAccessException e) {
// this should never happen, because we're just accessing fields
// from 'this'
throw new RuntimeException(e);
}
-
- return buffer.toString();
}
/**
- * Subclasses must implement this method and should compute
- * a hashCode similar to this:
- * <pre>
- * public int hashCode() {
- * int code = startOffset;
- * code = code * 31 + endOffset;
- * return code;
- * }
- * </pre>
- *
- * see also {@link #equals(Object)}
- */
- @Override
- public abstract int hashCode();
-
- /**
- * All values used for computation of {@link #hashCode()}
- * should be checked here for equality.
- *
- * see also {@link Object#equals(Object)}
- */
- @Override
- public abstract boolean equals(Object other);
-
- /**
* Copies the values from this Attribute into the passed-in
* target attribute. The target implementation must support all the
* Attributes this implementation supports.
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeSource.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeSource.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeSource.java Wed Jan 19 22:41:16 2011
@@ -180,20 +180,9 @@ public class AttributeSource {
private static final WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses =
new WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>>();
- /** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
- * <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
- * the <code>AttributeSource</code>, because the provided attributes may already exist.
- * You should always retrieve the wanted attributes using {@link #getAttribute} after adding
- * with this method and cast to your class.
- * The recommended way to use custom implementations is using an {@link AttributeFactory}.
- * </font></p>
- */
- public void addAttributeImpl(final AttributeImpl att) {
- final Class<? extends AttributeImpl> clazz = att.getClass();
- if (attributeImpls.containsKey(clazz)) return;
- LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces;
+ static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
synchronized(knownImplClasses) {
- foundInterfaces = knownImplClasses.get(clazz);
+ LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz);
if (foundInterfaces == null) {
// we have a strong reference to the class instance holding all interfaces in the list (parameter "att"),
// so all WeakReferences are never evicted by GC
@@ -210,7 +199,23 @@ public class AttributeSource {
actClazz = actClazz.getSuperclass();
} while (actClazz != null);
}
+ return foundInterfaces;
}
+ }
+
+ /** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
+ * <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
+ * the <code>AttributeSource</code>, because the provided attributes may already exist.
+ * You should always retrieve the wanted attributes using {@link #getAttribute} after adding
+ * with this method and cast to your class.
+ * The recommended way to use custom implementations is using an {@link AttributeFactory}.
+ * </font></p>
+ */
+ public final void addAttributeImpl(final AttributeImpl att) {
+ final Class<? extends AttributeImpl> clazz = att.getClass();
+ if (attributeImpls.containsKey(clazz)) return;
+ final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
+ getAttributeInterfaces(clazz);
// add all interfaces of this AttributeImpl to the maps
for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) {
@@ -233,7 +238,7 @@ public class AttributeSource {
* already in this AttributeSource and returns it. Otherwise a
* new instance is created, added to this AttributeSource and returned.
*/
- public <A extends Attribute> A addAttribute(Class<A> attClass) {
+ public final <A extends Attribute> A addAttribute(Class<A> attClass) {
AttributeImpl attImpl = attributes.get(attClass);
if (attImpl == null) {
if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) {
@@ -248,7 +253,7 @@ public class AttributeSource {
}
/** Returns true, iff this AttributeSource has any attributes */
- public boolean hasAttributes() {
+ public final boolean hasAttributes() {
return !this.attributes.isEmpty();
}
@@ -256,7 +261,7 @@ public class AttributeSource {
* The caller must pass in a Class<? extends Attribute> value.
* Returns true, iff this AttributeSource contains the passed-in Attribute.
*/
- public boolean hasAttribute(Class<? extends Attribute> attClass) {
+ public final boolean hasAttribute(Class<? extends Attribute> attClass) {
return this.attributes.containsKey(attClass);
}
@@ -271,7 +276,7 @@ public class AttributeSource {
* available. If you want to only use the attribute, if it is available (to optimize
* consuming), use {@link #hasAttribute}.
*/
- public <A extends Attribute> A getAttribute(Class<A> attClass) {
+ public final <A extends Attribute> A getAttribute(Class<A> attClass) {
AttributeImpl attImpl = attributes.get(attClass);
if (attImpl == null) {
throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'.");
@@ -319,7 +324,7 @@ public class AttributeSource {
* Resets all Attributes in this AttributeSource by calling
* {@link AttributeImpl#clear()} on each Attribute implementation.
*/
- public void clearAttributes() {
+ public final void clearAttributes() {
if (hasAttributes()) {
if (currentState == null) {
computeCurrentState();
@@ -334,7 +339,7 @@ public class AttributeSource {
* Captures the state of all Attributes. The return value can be passed to
* {@link #restoreState} to restore the state of this or another AttributeSource.
*/
- public State captureState() {
+ public final State captureState() {
if (!hasAttributes()) {
return null;
}
@@ -360,7 +365,7 @@ public class AttributeSource {
* reset its value to the default, in which case the caller should first
* call {@link TokenStream#clearAttributes()} on the targetStream.
*/
- public void restoreState(State state) {
+ public final void restoreState(State state) {
if (state == null) return;
do {
@@ -431,21 +436,53 @@ public class AttributeSource {
return false;
}
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder().append('(');
+ /**
+ * This method returns the current attribute values as a string in the following format
+ * by calling the {@link #reflectWith(AttributeReflector)} method:
+ *
+ * <ul>
+ * <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
+ * <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
+ * </ul>
+ *
+ * @see #reflectWith(AttributeReflector)
+ */
+ public final String reflectAsString(final boolean prependAttClass) {
+ final StringBuilder buffer = new StringBuilder();
+ reflectWith(new AttributeReflector() {
+ public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+ if (buffer.length() > 0) {
+ buffer.append(',');
+ }
+ if (prependAttClass) {
+ buffer.append(attClass.getName()).append('#');
+ }
+ buffer.append(key).append('=').append((value == null) ? "null" : value);
+ }
+ });
+ return buffer.toString();
+ }
+
+ /**
+ * This method is for introspection of attributes, it should simply
+ * add the key/values this AttributeSource holds to the given {@link AttributeReflector}.
+ *
+ * <p>This method iterates over all Attribute implementations and calls the
+ * corresponding {@link AttributeImpl#reflectWith} method.</p>
+ *
+ * @see AttributeImpl#reflectWith
+ */
+ public final void reflectWith(AttributeReflector reflector) {
if (hasAttributes()) {
if (currentState == null) {
computeCurrentState();
}
for (State state = currentState; state != null; state = state.next) {
- if (state != currentState) sb.append(',');
- sb.append(state.attribute.toString());
+ state.attribute.reflectWith(reflector);
}
}
- return sb.append(')').toString();
}
-
+
/**
* Performs a clone of all {@link AttributeImpl} instances returned in a new
* {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream
@@ -453,7 +490,7 @@ public class AttributeSource {
* You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look
* into / modify the captured state.
*/
- public AttributeSource cloneAttributes() {
+ public final AttributeSource cloneAttributes() {
final AttributeSource clone = new AttributeSource(this.factory);
if (hasAttributes()) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java Wed Jan 19 22:41:16 2011
@@ -22,8 +22,11 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util._TestUtil;
import java.io.StringReader;
+import java.util.HashMap;
public class TestToken extends LuceneTestCase {
@@ -241,6 +244,22 @@ public class TestToken extends LuceneTes
ts.addAttribute(TypeAttribute.class) instanceof Token);
}
+ public void testAttributeReflection() throws Exception {
+ Token t = new Token("foobar", 6, 22, 8);
+ _TestUtil.assertAttributeReflection(t,
+ new HashMap<String,Object>() {{
+ put(CharTermAttribute.class.getName() + "#term", "foobar");
+ put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
+ put(OffsetAttribute.class.getName() + "#startOffset", 6);
+ put(OffsetAttribute.class.getName() + "#endOffset", 22);
+ put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1);
+ put(PayloadAttribute.class.getName() + "#payload", null);
+ put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
+ put(FlagsAttribute.class.getName() + "#flags", 8);
+ }});
+ }
+
+
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
@SuppressWarnings("unchecked")
T clone = (T) att.clone();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java Wed Jan 19 22:41:16 2011
@@ -19,7 +19,10 @@ package org.apache.lucene.analysis.token
import org.apache.lucene.analysis.TestToken;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util._TestUtil;
import java.nio.CharBuffer;
+import java.util.HashMap;
import java.util.Formatter;
import java.util.Locale;
import java.util.regex.Pattern;
@@ -126,6 +129,15 @@ public class TestCharTermAttributeImpl e
assertNotSame(buf, copy.buffer());
}
+ public void testAttributeReflection() throws Exception {
+ CharTermAttributeImpl t = new CharTermAttributeImpl();
+ t.append("foobar");
+ _TestUtil.assertAttributeReflection(t, new HashMap<String,Object>() {{
+ put(CharTermAttribute.class.getName() + "#term", "foobar");
+ put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
+ }});
+ }
+
public void testCharSequenceInterface() {
final String s = "0123456789";
final CharTermAttributeImpl t = new CharTermAttributeImpl();
Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java?rev=1061039&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java Wed Jan 19 22:41:16 2011
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.util.Collections;
+import java.util.HashMap;
+
+public class TestSimpleAttributeImpl extends LuceneTestCase {
+
+ // this checks using reflection API if the defaults are correct
+ public void testAttributes() {
+ _TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(),
+ Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1));
+ _TestUtil.assertAttributeReflection(new FlagsAttributeImpl(),
+ Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0));
+ _TestUtil.assertAttributeReflection(new TypeAttributeImpl(),
+ Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE));
+ _TestUtil.assertAttributeReflection(new PayloadAttributeImpl(),
+ Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null));
+ _TestUtil.assertAttributeReflection(new KeywordAttributeImpl(),
+ Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false));
+ _TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap<String,Object>() {{
+ put(OffsetAttribute.class.getName()+"#startOffset", 0);
+ put(OffsetAttribute.class.getName()+"#endOffset", 0);
+ }});
+ }
+
+}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java Wed Jan 19 22:41:16 2011
@@ -109,34 +109,6 @@ public class TestAttributeSource extends
assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt);
}
- public void testToStringAndMultiAttributeImplementations() {
- AttributeSource src = new AttributeSource();
- CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class);
- TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class);
- termAtt.append("TestTerm");
- typeAtt.setType("TestType");
- assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString());
- Iterator<AttributeImpl> it = src.getAttributeImplsIterator();
- assertTrue("Iterator should have 2 attributes left", it.hasNext());
- assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next());
- assertTrue("Iterator should have 1 attributes left", it.hasNext());
- assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next());
- assertFalse("Iterator should have 0 attributes left", it.hasNext());
-
- src = new AttributeSource();
- src.addAttributeImpl(new Token());
- // this should not add a new attribute as Token implements CharTermAttribute, too
- termAtt = src.addAttribute(CharTermAttribute.class);
- assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token);
- // get the Token attribute and check, that it is the only one
- it = src.getAttributeImplsIterator();
- Token tok = (Token) it.next();
- assertFalse("There should be only one attribute implementation instance", it.hasNext());
-
- termAtt.setEmpty().append("TestTerm");
- assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString());
- }
-
public void testDefaultAttributeFactory() throws Exception {
AttributeSource src = new AttributeSource();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/_TestUtil.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/_TestUtil.java Wed Jan 19 22:41:16 2011
@@ -22,6 +22,10 @@ import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Random;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.junit.Assert;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler;
@@ -238,4 +242,17 @@ public class _TestUtil {
((ConcurrentMergeScheduler) ms).setMaxMergeCount(3);
}
}
+
+ /** Checks some basic behaviour of an AttributeImpl
+ * @param reflectedValues contains a map with "AttributeClass#key" as values
+ */
+ public static <T> void assertAttributeReflection(final AttributeImpl att, Map<String,T> reflectedValues) {
+ final Map<String,Object> map = new HashMap<String,Object>();
+ att.reflectWith(new AttributeReflector() {
+ public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+ map.put(attClass.getName() + '#' + key, value);
+ }
+ });
+ Assert.assertEquals("Reflection does not produce same map", reflectedValues, map);
+ }
}
Modified: lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (original)
+++ lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java Wed Jan 19 22:41:16 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu.t
import java.io.Serializable;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
import com.ibm.icu.lang.UScript;
@@ -77,7 +78,7 @@ public class ScriptAttributeImpl extends
}
@Override
- public String toString() {
- return "script=" + getName();
+ public void reflectWith(AttributeReflector reflector) {
+ reflector.reflect(ScriptAttribute.class, "script", getName());
}
}
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml Wed Jan 19 22:41:16 2011
@@ -428,13 +428,6 @@
-->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
- <!--
- Analysis request handler. Since Solr 1.3. Use to returnhow a document is analyzed. Useful
- for debugging and as a token server for other types of applications
- -->
- <requestHandler name="/analysis" class="solr.AnalysisRequestHandler" />
-
-
<!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java Wed Jan 19 22:41:16 2011
@@ -20,10 +20,14 @@ package org.apache.solr.handler;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.SorterTemplate;
import org.apache.solr.analysis.CharFilterFactory;
import org.apache.solr.analysis.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
@@ -34,6 +38,9 @@ import org.apache.solr.common.SolrExcept
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
+import org.apache.solr.util.ByteUtils;
+
+import org.apache.noggit.CharArr;
import java.io.IOException;
import java.io.StringReader;
@@ -47,7 +54,7 @@ import java.util.*;
*/
public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
- public static final Set<String> EMPTY_STRING_SET = Collections.emptySet();
+ public static final Set<BytesRef> EMPTY_BYTES_SET = Collections.emptySet();
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
rsp.add("analysis", doAnalysis(req));
@@ -107,7 +114,7 @@ public abstract class AnalysisRequestHan
}
TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
- List<Token> tokens = analyzeTokenStream(tokenStream);
+ List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
@@ -115,7 +122,7 @@ public abstract class AnalysisRequestHan
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
- List<Token> tokenList = analyzeTokenStream(tokenStream);
+ List<AttributeSource> tokenList = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
listBasedTokenStream = new ListBasedTokenStream(tokenList);
}
@@ -126,14 +133,24 @@ public abstract class AnalysisRequestHan
/**
* Analyzes the given text using the given analyzer and returns the produced tokens.
*
- * @param value The value to analyze.
+ * @param query The query to analyze.
* @param analyzer The analyzer to use.
- *
- * @return The produces token list.
*/
- protected List<Token> analyzeValue(String value, Analyzer analyzer) {
- TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value));
- return analyzeTokenStream(tokenStream);
+ protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
+ final Set<BytesRef> tokens = new HashSet<BytesRef>();
+ final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
+ final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
+ try {
+ tokenStream.reset();
+ while (tokenStream.incrementToken()) {
+ final BytesRef bytes = new BytesRef();
+ bytesAtt.toBytesRef(bytes);
+ tokens.add(bytes);
+ }
+ } catch (IOException ioe) {
+ throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
+ }
+ return tokens;
}
/**
@@ -143,41 +160,17 @@ public abstract class AnalysisRequestHan
*
* @return List of tokens produced from the TokenStream
*/
- private List<Token> analyzeTokenStream(TokenStream tokenStream) {
- List<Token> tokens = new ArrayList<Token>();
-
- // TODO change this API to support custom attributes
- CharTermAttribute termAtt = null;
- TermToBytesRefAttribute bytesAtt = null;
- if (tokenStream.hasAttribute(CharTermAttribute.class)) {
- termAtt = tokenStream.getAttribute(CharTermAttribute.class);
- } else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
- bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
- }
- final OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
- final TypeAttribute typeAtt = tokenStream.addAttribute(TypeAttribute.class);
- final PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
- final FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class);
- final PayloadAttribute payloadAtt = tokenStream.addAttribute(PayloadAttribute.class);
-
+ private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
+ List<AttributeSource> tokens = new ArrayList<AttributeSource>();
+ // for backwards compatibility, add all "common" attributes
+ tokenStream.addAttribute(PositionIncrementAttribute.class);
+ tokenStream.addAttribute(OffsetAttribute.class);
+ tokenStream.addAttribute(TypeAttribute.class);
final BytesRef bytes = new BytesRef();
try {
+ tokenStream.reset();
while (tokenStream.incrementToken()) {
- Token token = new Token();
- if (termAtt != null) {
- token.setEmpty().append(termAtt);
- }
- if (bytesAtt != null) {
- bytesAtt.toBytesRef(bytes);
- // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
- token.setEmpty().append(bytes.utf8ToString());
- }
- token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
- token.setType(typeAtt.type());
- token.setFlags(flagsAtt.getFlags());
- token.setPayload(payloadAtt.getPayload());
- token.setPositionIncrement(posIncAtt.getPositionIncrement());
- tokens.add((Token) token.clone());
+ tokens.add(tokenStream.cloneAttributes());
}
} catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
@@ -186,6 +179,13 @@ public abstract class AnalysisRequestHan
return tokens;
}
+ // a static mapping of the reflected attribute keys to the names used in Solr 1.4
+ static Map<String,String> ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap<String,String>() {{
+ put(OffsetAttribute.class.getName() + "#startOffset", "start");
+ put(OffsetAttribute.class.getName() + "#endOffset", "end");
+ put(TypeAttribute.class.getName() + "#type", "type");
+ }});
+
/**
* Converts the list of Tokens to a list of NamedLists representing the tokens.
*
@@ -194,41 +194,97 @@ public abstract class AnalysisRequestHan
*
* @return List of NamedLists containing the relevant information taken from the tokens
*/
- private List<NamedList> convertTokensToNamedLists(List<Token> tokens, AnalysisContext context) {
- List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
+ private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokens, AnalysisContext context) {
+ final List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
- Collections.sort(tokens, new Comparator<Token>() {
- public int compare(Token o1, Token o2) {
- return o1.endOffset() - o2.endOffset();
+ final int[] positions = new int[tokens.size()];
+ int position = 0;
+ for (int i = 0, c = tokens.size(); i < c; i++) {
+ AttributeSource token = tokens.get(i);
+ position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
+ positions[i] = position;
+ }
+
+ // sort the tokens by absoulte position
+ new SorterTemplate() {
+ @Override
+ protected void swap(int i, int j) {
+ Collections.swap(tokens, i, j);
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return positions[i] - positions[j];
}
- });
- int position = 0;
+ @Override
+ protected void setPivot(int i) {
+ pivot = positions[i];
+ }
+
+ @Override
+ protected int comparePivot(int j) {
+ return pivot - positions[j];
+ }
+
+ private int pivot;
+ }.mergeSort(0, tokens.size() - 1);
FieldType fieldType = context.getFieldType();
- for (Token token : tokens) {
- NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
+ final BytesRef rawBytes = new BytesRef();
+ final CharArr textBuf = new CharArr();
+ for (int i = 0, c = tokens.size(); i < c; i++) {
+ AttributeSource token = tokens.get(i);
+ final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
+ token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes);
+
+ textBuf.reset();
+ fieldType.indexedToReadable(rawBytes, textBuf);
+ final String text = textBuf.toString();
- String text = fieldType.indexedToReadable(token.toString());
tokenNamedList.add("text", text);
- if (!text.equals(token.toString())) {
- tokenNamedList.add("raw_text", token.toString());
+
+ if (token.hasAttribute(CharTermAttribute.class)) {
+ final String rawText = token.getAttribute(CharTermAttribute.class).toString();
+ if (!rawText.equals(text)) {
+ tokenNamedList.add("raw_text", rawText);
+ }
}
- tokenNamedList.add("type", token.type());
- tokenNamedList.add("start", token.startOffset());
- tokenNamedList.add("end", token.endOffset());
- position += token.getPositionIncrement();
- tokenNamedList.add("position", position);
+ tokenNamedList.add("raw_bytes", rawBytes.toString());
- if (context.getTermsToMatch().contains(token.toString())) {
+ if (context.getTermsToMatch().contains(rawBytes)) {
tokenNamedList.add("match", true);
}
- if (token.getPayload() != null) {
- tokenNamedList.add("payload", token.getPayload());
- }
+ tokenNamedList.add("position", positions[i]);
+
+ token.reflectWith(new AttributeReflector() {
+ public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+ // leave out position and bytes term
+ if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (CharTermAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
+ return;
+
+ String k = attClass.getName() + '#' + key;
+
+ // map keys for "standard attributes":
+ if (ATTRIBUTE_MAPPING.containsKey(k)) {
+ k = ATTRIBUTE_MAPPING.get(k);
+ }
+
+ if (value instanceof Payload) {
+ final Payload p = (Payload) value;
+ value = new BytesRef(p.getData()).toString();
+ }
+
+ tokenNamedList.add(k, value);
+ }
+ });
tokensNamedLists.add(tokenNamedList);
}
@@ -261,38 +317,27 @@ public abstract class AnalysisRequestHan
*/
// TODO refactor to support custom attributes
protected final static class ListBasedTokenStream extends TokenStream {
- private final List<Token> tokens;
- private Iterator<Token> tokenIterator;
+ private final List<AttributeSource> tokens;
+ private Iterator<AttributeSource> tokenIterator;
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
- private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
- private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
- private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
/**
* Creates a new ListBasedTokenStream which uses the given tokens as its token source.
*
* @param tokens Source of tokens to be used
*/
- ListBasedTokenStream(List<Token> tokens) {
+ ListBasedTokenStream(List<AttributeSource> tokens) {
this.tokens = tokens;
tokenIterator = tokens.iterator();
}
- /**
- * {@inheritDoc}
- */
@Override
public boolean incrementToken() throws IOException {
if (tokenIterator.hasNext()) {
- Token next = tokenIterator.next();
- termAtt.copyBuffer(next.buffer(), 0, next.length());
- typeAtt.setType(next.type());
- offsetAtt.setOffset(next.startOffset(), next.endOffset());
- flagsAtt.setFlags(next.getFlags());
- payloadAtt.setPayload(next.getPayload());
- posIncAtt.setPositionIncrement(next.getPositionIncrement());
+ AttributeSource next = tokenIterator.next();
+ Iterator<Class<? extends Attribute>> atts = next.getAttributeClassesIterator();
+ while (atts.hasNext()) // make sure all att impls in the token exist here
+ addAttribute(atts.next());
+ next.copyTo(this);
return true;
} else {
return false;
@@ -314,7 +359,7 @@ public abstract class AnalysisRequestHan
private final String fieldName;
private final FieldType fieldType;
private final Analyzer analyzer;
- private final Set<String> termsToMatch;
+ private final Set<BytesRef> termsToMatch;
/**
* Constructs a new AnalysisContext with a given field tpe, analyzer and
@@ -328,7 +373,7 @@ public abstract class AnalysisRequestHan
* @param termsToMatch Holds all the terms that should match during the
* analysis process.
*/
- public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set<String> termsToMatch) {
+ public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set<BytesRef> termsToMatch) {
this(null, fieldType, analyzer, termsToMatch);
}
@@ -343,7 +388,7 @@ public abstract class AnalysisRequestHan
*
*/
public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer) {
- this(fieldName, fieldType, analyzer, EMPTY_STRING_SET);
+ this(fieldName, fieldType, analyzer, EMPTY_BYTES_SET);
}
/**
@@ -359,7 +404,7 @@ public abstract class AnalysisRequestHan
* @param termsToMatch Holds all the terms that should match during the
* analysis process.
*/
- public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set<String> termsToMatch) {
+ public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set<BytesRef> termsToMatch) {
this.fieldName = fieldName;
this.fieldType = fieldType;
this.analyzer = analyzer;
@@ -378,7 +423,7 @@ public abstract class AnalysisRequestHan
return analyzer;
}
- public Set<String> getTermsToMatch() {
+ public Set<BytesRef> getTermsToMatch() {
return termsToMatch;
}
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java Wed Jan 19 22:41:16 2011
@@ -19,7 +19,7 @@ package org.apache.solr.handler;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@@ -216,21 +216,20 @@ public class DocumentAnalysisRequestHand
FieldType fieldType = schema.getFieldType(name);
- Set<String> termsToMatch = new HashSet<String>();
- if (request.getQuery() != null && request.isShowMatch()) {
- try {
- List<Token> tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer());
- for (Token token : tokens) {
- termsToMatch.add(token.toString());
- }
- } catch (Exception e) {
- // ignore analysis exceptions since we are applying arbitrary text to all fields
- }
+ final String queryValue = request.getQuery();
+ Set<BytesRef> termsToMatch;
+ try {
+ termsToMatch = (queryValue != null && request.isShowMatch())
+ ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
+ : EMPTY_BYTES_SET;
+ } catch (Exception e) {
+ // ignore analysis exceptions since we are applying arbitrary text to all fields
+ termsToMatch = EMPTY_BYTES_SET;
}
if (request.getQuery() != null) {
try {
- AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_STRING_SET);
+ AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET);
fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext));
} catch (Exception e) {
// ignore analysis exceptions since we are applying arbitrary text to all fields
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java Wed Jan 19 22:41:16 2011
@@ -17,7 +17,7 @@
package org.apache.solr.handler;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
import org.apache.solr.common.params.AnalysisParams;
import org.apache.solr.common.params.CommonParams;
@@ -30,10 +30,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.commons.io.IOUtils;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import java.io.Reader;
import java.io.IOException;
@@ -222,14 +219,10 @@ public class FieldAnalysisRequestHandler
*/
private NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {
- Set<String> termsToMatch = new HashSet<String>();
- String queryValue = analysisRequest.getQuery();
- if (queryValue != null && analysisRequest.isShowMatch()) {
- List<Token> tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer());
- for (Token token : tokens) {
- termsToMatch.add(token.toString());
- }
- }
+ final String queryValue = analysisRequest.getQuery();
+ final Set<BytesRef> termsToMatch = (queryValue != null && analysisRequest.isShowMatch())
+ ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
+ : EMPTY_BYTES_SET;
NamedList<NamedList> analyzeResults = new SimpleOrderedMap<NamedList>();
if (analysisRequest.getFieldValue() != null) {
Modified: lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp?rev=1061039&r1=1061038&r2=1061039&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp (original)
+++ lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp Wed Jan 19 22:41:16 2011
@@ -24,6 +24,7 @@
org.apache.lucene.analysis.CharReader,
org.apache.lucene.analysis.CharStream,
org.apache.lucene.analysis.tokenattributes.*,
+ org.apache.lucene.util.AttributeReflector,
org.apache.solr.analysis.CharFilterFactory,
org.apache.solr.analysis.TokenFilterFactory,
org.apache.solr.analysis.TokenizerChain,
@@ -31,7 +32,8 @@
org.apache.solr.schema.FieldType,
org.apache.solr.schema.SchemaField,
org.apache.solr.common.util.XML,
- javax.servlet.jsp.JspWriter,java.io.IOException
+ javax.servlet.jsp.JspWriter,java.io.IOException,
+ org.apache.noggit.CharArr
"%>
<%@ page import="java.io.Reader"%>
<%@ page import="java.io.StringReader"%>
@@ -39,8 +41,6 @@
<%@ page import="java.math.BigInteger" %>
<%-- $Id$ --%>
-<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%>
-<%-- $Name: $ --%>
<%@include file="header.jsp" %>
@@ -71,19 +71,19 @@
<table>
<tr>
<td>
- <strong>Field
+ <strong>Field
<select name="nt">
- <option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
- <option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
+ <option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
+ <option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
</select></strong>
</td>
<td>
- <input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
+ <input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
</td>
</tr>
<tr>
<td>
- <strong>Field value (Index)</strong>
+ <strong>Field value (Index)</strong>
<br/>
verbose output
<input name="verbose" type="checkbox"
@@ -94,19 +94,19 @@
<%= highlight ? "checked=\"true\"" : "" %> >
</td>
<td>
- <textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
+ <textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
</td>
</tr>
<tr>
<td>
- <strong>Field value (Query)</strong>
+ <strong>Field value (Query)</strong>
<br/>
verbose output
<input name="qverbose" type="checkbox"
<%= qverbose ? "checked=\"true\"" : "" %> >
</td>
<td>
- <textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
+ <textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
</td>
</tr>
<tr>
@@ -115,7 +115,7 @@
</td>
<td>
- <input class="stdbutton" type="submit" value="analyze">
+ <input class="stdbutton" type="submit" value="analyze">
</td>
</tr>
@@ -148,24 +148,28 @@
}
if (field!=null) {
- HashSet<Tok> matches = null;
+ HashSet<BytesRef> matches = null;
if (qval!="" && highlight) {
Reader reader = new StringReader(qval);
Analyzer analyzer = field.getType().getQueryAnalyzer();
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
+ TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
tstream.reset();
- List<AttributeSource> tokens = getTokens(tstream);
- matches = new HashSet<Tok>();
- for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); }
+ matches = new HashSet<BytesRef>();
+ while (tstream.incrementToken()) {
+ final BytesRef bytes = new BytesRef();
+ bytesAtt.toBytesRef(bytes);
+ matches.add(bytes);
+ }
}
if (val!="") {
out.println("<h3>Index Analyzer</h3>");
- doAnalyzer(out, field, val, false, verbose,matches);
+ doAnalyzer(out, field, val, false, verbose, matches);
}
if (qval!="") {
out.println("<h3>Query Analyzer</h3>");
- doAnalyzer(out, field, qval, true, qverbose,null);
+ doAnalyzer(out, field, qval, true, qverbose, null);
}
}
@@ -177,7 +181,7 @@
<%!
- private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception {
+ private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<BytesRef> match) throws Exception {
FieldType ft = field.getType();
Analyzer analyzer = queryAnalyser ?
@@ -240,7 +244,7 @@
tstream.reset();
List<AttributeSource> tokens = getTokens(tstream);
if (verbose) {
- writeHeader(out, analyzer.getClass(), new HashMap<String,String>());
+ writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP);
}
writeTokens(out, tokens, ft, verbose, match);
}
@@ -249,52 +253,59 @@
static List<AttributeSource> getTokens(TokenStream tstream) throws IOException {
List<AttributeSource> tokens = new ArrayList<AttributeSource>();
-
- while (true) {
- if (!tstream.incrementToken())
- break;
- else {
- tokens.add(tstream.cloneAttributes());
- }
+ tstream.reset();
+ while (tstream.incrementToken()) {
+ tokens.add(tstream.cloneAttributes());
}
return tokens;
}
-
- private static class Tok {
- AttributeSource token;
- int pos;
- Tok(AttributeSource token, int pos) {
- this.token=token;
- this.pos=pos;
- }
-
- public boolean equals(Object o) {
- return ((Tok)o).token.toString().equals(token.toString());
- }
- public int hashCode() {
- return token.toString().hashCode();
- }
- public String toString() {
- return token.toString();
+ private static class ReflectItem {
+ final Class<? extends Attribute> attClass;
+ final String key;
+ final Object value;
+
+ ReflectItem(Class<? extends Attribute> attClass, String key, Object value) {
+ this.attClass = attClass;
+ this.key = key;
+ this.value = value;
}
- public String toPrintableString() {
- TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class);
- if (att instanceof CharTermAttribute)
- return att.toString();
- else {
- BytesRef bytes = new BytesRef();
- att.toBytesRef(bytes);
- return bytes.toString();
- }
+ }
+
+ private static class Tok {
+ final BytesRef bytes = new BytesRef();
+ final String rawText, text;
+ final int pos;
+ final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
+
+ Tok(AttributeSource token, int pos, FieldType ft) {
+ this.pos = pos;
+ token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes);
+ rawText = (token.hasAttribute(CharTermAttribute.class)) ?
+ token.getAttribute(CharTermAttribute.class).toString() : null;
+ final CharArr textBuf = new CharArr(bytes.length);
+ ft.indexedToReadable(bytes, textBuf);
+ text = textBuf.toString();
+ token.reflectWith(new AttributeReflector() {
+ public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+ // leave out position and raw term
+ if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (CharTermAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
+ return;
+ reflected.add(new ReflectItem(attClass, key, value));
+ }
+ });
}
}
- private static interface ToStr {
- public String toStr(Object o);
+ private static interface TokToStr {
+ public String toStr(Tok o);
}
- private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set<Tok> match) throws IOException {
+ private static void printRow(JspWriter out, String header, String headerTitle, List<Tok>[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set<BytesRef> match) throws IOException {
// find the maximum number of terms for any position
int maxSz=1;
if (multival) {
@@ -308,7 +319,13 @@
out.println("<tr>");
if (idx==0 && verbose) {
if (header != null) {
- out.print("<th NOWRAP rowspan=\""+maxSz+"\">");
+ out.print("<th NOWRAP rowspan=\""+maxSz+"\"");
+ if (headerTitle != null) {
+ out.print(" title=\"");
+ XML.escapeCharData(headerTitle,out);
+ out.print("\"");
+ }
+ out.print(">");
XML.escapeCharData(header,out);
out.println("</th>");
}
@@ -317,7 +334,7 @@
for (int posIndex=0; posIndex<arrLst.length; posIndex++) {
List<Tok> lst = arrLst[posIndex];
if (lst.size() <= idx) continue;
- if (match!=null && match.contains(lst.get(idx))) {
+ if (match!=null && match.contains(lst.get(idx).bytes)) {
out.print("<td class=\"highlight\"");
} else {
out.print("<td class=\"debugdata\"");
@@ -340,15 +357,6 @@
}
- static String isPayloadString( Payload p ) {
- String sp = new String( p.getData() );
- for( int i=0; i < sp.length(); i++ ) {
- if( !Character.isDefined( sp.charAt(i) ) || Character.isISOControl( sp.charAt(i) ) )
- return "";
- }
- return "(" + sp + ")";
- }
-
static void writeHeader(JspWriter out, Class clazz, Map<String,String> args) throws IOException {
out.print("<h4>");
out.print(clazz.getName());
@@ -359,137 +367,93 @@
// readable, raw, pos, type, start/end
- static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException {
+ static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<BytesRef> match) throws IOException {
// Use a map to tell what tokens are in what positions
// because some tokenizers/filters may do funky stuff with
// very large increments, or negative increments.
HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>();
boolean needRaw=false;
- int pos=0;
+ int pos=0, reflectionCount = -1;
for (AttributeSource t : tokens) {
- if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
- needRaw=true;
- }
-
pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
List lst = map.get(pos);
if (lst==null) {
lst = new ArrayList(1);
map.put(pos,lst);
}
- Tok tok = new Tok(t,pos);
+ Tok tok = new Tok(t,pos,ft);
+ // sanity check
+ if (reflectionCount < 0) {
+ reflectionCount = tok.reflected.size();
+ } else {
+ if (reflectionCount != tok.reflected.size())
+ throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos);
+ }
+ if (tok.rawText != null && !tok.text.equals(tok.rawText)) {
+ needRaw=true;
+ }
lst.add(tok);
}
List<Tok>[] arr = (List<Tok>[])map.values().toArray(new ArrayList[map.size()]);
- /* Jetty 6.1.3 miscompiles this generics version...
- Arrays.sort(arr, new Comparator<List<Tok>>() {
- public int compare(List<Tok> toks, List<Tok> toks1) {
- return toks.get(0).pos - toks1.get(0).pos;
- }
- }
- */
-
+ // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics:
Arrays.sort(arr, new Comparator() {
public int compare(Object toks, Object toks1) {
return ((List<Tok>)toks).get(0).pos - ((List<Tok>)toks1).get(0).pos;
}
- }
-
-
- );
+ });
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
if (verbose) {
- printRow(out,"term position", arr, new ToStr() {
- public String toStr(Object o) {
- return Integer.toString(((Tok)o).pos);
+ printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return Integer.toString(t.pos);
}
- }
- ,false
- ,verbose
- ,null);
- }
-
-
- printRow(out,"term text", arr, new ToStr() {
- public String toStr(Object o) {
- return ft.indexedToReadable( ((Tok)o).toPrintableString() );
- }
+ },false,verbose,null);
}
- ,true
- ,verbose
- ,match
- );
- if (needRaw) {
- printRow(out,"raw text", arr, new ToStr() {
- public String toStr(Object o) {
- // page is UTF-8, so anything goes.
- return ((Tok)o).toPrintableString();
- }
+ printRow(out, "term text", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return t.text;
}
- ,true
- ,verbose
- ,match
- );
- }
+ },true,verbose,match);
if (verbose) {
- printRow(out,"term type", arr, new ToStr() {
- public String toStr(Object o) {
- String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type();
- if (tt == null) {
- return "null";
- } else {
- return tt;
+ if (needRaw) {
+ printRow(out, "raw text", CharTermAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ // page is UTF-8, so anything goes.
+ return (t.rawText == null) ? "" : t.rawText;
}
- }
+ },true,verbose,match);
}
- ,true
- ,verbose,
- null
- );
- }
-
- if (verbose) {
- printRow(out,"source start,end", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ;
+
+ printRow(out, "raw bytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return t.bytes.toString();
}
- }
- ,true
- ,verbose
- ,null
- );
- }
+ },true,verbose,match);
- if (verbose) {
- printRow(out,"payload", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- Payload p = t.addAttribute(PayloadAttribute.class).getPayload();
- if( null != p ) {
- BigInteger bi = new BigInteger( p.getData() );
- String ret = bi.toString( 16 );
- if (ret.length() % 2 != 0) {
- // Pad with 0
- ret = "0"+ret;
+ for (int att=0; att < reflectionCount; att++) {
+ final ReflectItem item0 = arr[0].get(0).reflected.get(att);
+ final int i = att;
+ printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ final ReflectItem item = t.reflected.get(i);
+ if (item0.attClass != item.attClass || !item0.key.equals(item.key))
+ throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos);
+ if (item.value instanceof Payload) {
+ final Payload p = (Payload) item.value;
+ return new BytesRef(p.getData()).toString();
+ } else {
+ return (item.value != null) ? item.value.toString() : "";
}
- ret += isPayloadString( p );
- return ret;
}
- return "";
- }
+ },true,verbose, null);
}
- ,true
- ,verbose
- ,null
- );
}
out.println("</table>");