You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2009/07/24 23:45:50 UTC
svn commit: r797665 [3/3] - in /lucene/java/trunk: ./
src/java/org/apache/lucene/analysis/
src/java/org/apache/lucene/analysis/standard/
src/java/org/apache/lucene/analysis/tokenattributes/
src/java/org/apache/lucene/index/ src/java/org/apache/lucene/q...
Modified: lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java Fri Jul 24 21:45:48 2009
@@ -18,14 +18,17 @@
*/
import java.util.Iterator;
+import java.util.Collections;
import java.util.LinkedHashMap;
+import java.util.IdentityHashMap;
+import java.util.LinkedList;
import java.util.Map;
+import java.util.Map.Entry;
-import org.apache.lucene.analysis.TokenStream;
-
+import org.apache.lucene.analysis.TokenStream; // for javadocs
/**
- * An AttributeSource contains a list of different {@link Attribute}s,
+ * An AttributeSource contains a list of different {@link AttributeImpl}s,
* and methods to add and get them. There can only be a single instance
* of an attribute in the same AttributeSource instance. This is ensured
* by passing in the actual type of the Attribute (Class<Attribute>) to
@@ -40,43 +43,147 @@
*/
public class AttributeSource {
/**
- * An AttributeAcceptor defines only a single method {@link #accept(Class)}.
- * It can be used for e. g. buffering purposes to specify which attributes
- * to buffer.
+ * An AttributeFactory creates instances of {@link AttributeImpl}s.
*/
- public static abstract class AttributeAcceptor {
- /** Return true, to accept this attribute; false otherwise */
- public abstract boolean accept(Class attClass);
+ public static abstract class AttributeFactory {
+ /**
+ * returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
+ */
+ public abstract AttributeImpl createAttributeInstance(Class attClass);
+
+ /**
+ * This is the default factory that creates {@link AttributeImpl}s using the
+ * class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
+ */
+ public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory();
+
+ private static final class DefaultAttributeFactory extends AttributeFactory {
+ private static final IdentityHashMap/*<Class<? extends Attribute>,Class<? extends AttributeImpl>>*/ attClassImplMap = new IdentityHashMap();
+
+ private DefaultAttributeFactory() {}
+
+ public AttributeImpl createAttributeInstance(Class attClass) {
+ try {
+ return (AttributeImpl) getClassForInterface(attClass).newInstance();
+ } catch (InstantiationException e) {
+ throw new IllegalArgumentException("Could not instantiate class " + attClass);
+ } catch (IllegalAccessException e) {
+ throw new IllegalArgumentException("Could not instantiate class " + attClass);
+ }
+ }
+
+ private static Class getClassForInterface(Class attClass) {
+ synchronized(attClassImplMap) {
+ Class clazz = (Class) attClassImplMap.get(attClass);
+ if (clazz == null) {
+ try {
+ attClassImplMap.put(attClass, clazz = Class.forName(attClass.getName() + "Impl"));
+ } catch (ClassNotFoundException e) {
+ throw new IllegalArgumentException("Could not find implementing class for " + attClass.getName());
+ }
+ }
+ return clazz;
+ }
+ }
+ }
}
-
- /**
- * Default AttributeAcceptor that accepts all attributes.
- */
- public static final AttributeAcceptor AllAcceptor = new AttributeAcceptor() {
- public boolean accept(Class attClass) {return true;}
- };
+
+ // These two maps must always be in sync!!!
+ // So they are private, final and read-only from the outside (read-only iterators)
+ private final Map/*<Class<Attribute>,AttributeImpl>*/ attributes;
+ private final Map/*<Class<AttributeImpl>,AttributeImpl>*/ attributeImpls;
+ private AttributeFactory factory;
+
/**
- * Holds the Class<Attribute> -> Attribute mapping
+ * An AttributeSource using the default attribute factory {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
*/
- protected Map attributes;
-
public AttributeSource() {
- this.attributes = new LinkedHashMap();
+ this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
}
+ /**
+ * An AttributeSource that uses the same attributes as the supplied one.
+ */
public AttributeSource(AttributeSource input) {
if (input == null) {
throw new IllegalArgumentException("input AttributeSource must not be null");
}
this.attributes = input.attributes;
+ this.attributeImpls = input.attributeImpls;
+ this.factory = input.factory;
+ }
+
+ /**
+ * An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} instances.
+ */
+ public AttributeSource(AttributeFactory factory) {
+ this.attributes = new LinkedHashMap();
+ this.attributeImpls = new LinkedHashMap();
+ this.factory = factory;
+ }
+
+ /**
+ * returns the used AttributeFactory.
+ */
+ public AttributeFactory getAttributeFactory() {
+ return this.factory;
}
- /** Returns an iterator that iterates the attributes
+ /** Returns a new iterator that iterates the attribute classes
* in the same order they were added in.
*/
- public Iterator getAttributesIterator() {
- return attributes.values().iterator();
+ public Iterator/*<Class<? extends Attribute>>*/ getAttributeClassesIterator() {
+ return Collections.unmodifiableSet(attributes.keySet()).iterator();
+ }
+
+ /** Returns a new iterator that iterates all unique Attribute implementations.
+ * This iterator may contain less entries that {@link #getAttributeClassesIterator},
+ * if one instance implements more than one Attribute interface.
+ */
+ public Iterator/*<AttributeImpl>*/ getAttributeImplsIterator() {
+ return Collections.unmodifiableCollection(attributeImpls.values()).iterator();
+ }
+
+ /** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
+ private static final IdentityHashMap/*<Class<? extends AttributeImpl>,LinkedList<Class<? extends Attribute>>>*/ knownImplClasses = new IdentityHashMap();
+
+ /** Adds a custom AttributeImpl instance with one or more Attribute interfaces. */
+ public void addAttributeImpl(final AttributeImpl att) {
+ final Class clazz = att.getClass();
+ if (attributeImpls.containsKey(clazz)) return;
+ LinkedList foundInterfaces;
+ synchronized(knownImplClasses) {
+ foundInterfaces = (LinkedList) knownImplClasses.get(clazz);
+ if (foundInterfaces == null) {
+ knownImplClasses.put(clazz, foundInterfaces=new LinkedList());
+ // find all interfaces that this attribute instance implements
+ // and that extend the Attribute interface
+ Class actClazz = clazz;
+ do {
+ Class[] interfaces = actClazz.getInterfaces();
+ for (int i = 0; i < interfaces.length; i++) {
+ final Class curInterface = interfaces[i];
+ if (Attribute.class.isAssignableFrom(curInterface)) {
+ foundInterfaces.add(curInterface);
+ }
+ }
+ actClazz = actClazz.getSuperclass();
+ } while (actClazz != null);
+ }
+ }
+
+ // add all interfaces of this AttributeImpl to the maps
+ for (Iterator it = foundInterfaces.iterator(); it.hasNext(); ) {
+ final Class curInterface = (Class) it.next();
+ // Attribute is a superclass of this interface
+ if (!attributes.containsKey(curInterface)) {
+ // invalidate state to force recomputation in captureState()
+ this.currentState = null;
+ attributes.put(curInterface, att);
+ attributeImpls.put(clazz, att);
+ }
+ }
}
/**
@@ -85,18 +192,11 @@
* already in this AttributeSource and returns it. Otherwise a
* new instance is created, added to this AttributeSource and returned.
*/
- public Attribute addAttribute(Class attClass) {
- Attribute att = (Attribute) attributes.get(attClass);
+ public AttributeImpl addAttribute(Class attClass) {
+ AttributeImpl att = (AttributeImpl) attributes.get(attClass);
if (att == null) {
- try {
- att = (Attribute) attClass.newInstance();
- } catch (InstantiationException e) {
- throw new IllegalArgumentException("Could not instantiate class " + attClass);
- } catch (IllegalAccessException e) {
- throw new IllegalArgumentException("Could not instantiate class " + attClass);
- }
-
- attributes.put(attClass, att);
+ att = this.factory.createAttributeInstance(attClass);
+ addAttributeImpl(att);
}
return att;
}
@@ -121,10 +221,10 @@
* @throws IllegalArgumentException if this AttributeSource does not contain the
* Attribute
*/
- public Attribute getAttribute(Class attClass) {
- Attribute att = (Attribute) this.attributes.get(attClass);
+ public AttributeImpl getAttribute(Class attClass) {
+ AttributeImpl att = (AttributeImpl) this.attributes.get(attClass);
if (att == null) {
- throw new IllegalArgumentException("This token does not have the attribute '" + attClass + "'.");
+ throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass + "'.");
}
return att;
@@ -132,52 +232,72 @@
/**
* Resets all Attributes in this AttributeSource by calling
- * {@link Attribute#clear()} on each Attribute.
+ * {@link AttributeImpl#clear()} on each Attribute implementation.
*/
public void clearAttributes() {
- Iterator it = getAttributesIterator();
+ Iterator it = getAttributeImplsIterator();
while (it.hasNext()) {
- ((Attribute) it.next()).clear();
+ ((AttributeImpl) it.next()).clear();
}
}
/**
- * Captures the current state of the passed in TokenStream.
- * <p>
- * This state will contain all of the passed in TokenStream's
- * {@link Attribute}s. If only a subset of the attributes is needed
- * please use {@link #captureState(AttributeAcceptor)}
- */
- public AttributeSource captureState() {
- return captureState(AllAcceptor);
+ * This class holds the state of an AttributeSource.
+ * @see #captureState
+ * @see #restoreState
+ */
+ public static final class State implements Cloneable {
+ private AttributeImpl attribute;
+ private State next;
+
+ public Object clone() {
+ State clone = new State();
+ clone.attribute = (AttributeImpl) attribute.clone();
+
+ if (next != null) {
+ clone.next = (State) next.clone();
+ }
+
+ return clone;
+ }
}
-
+
+ private State currentState = null;
+
+ private void computeCurrentState() {
+ currentState = new State();
+ State c = currentState;
+ Iterator it = getAttributeImplsIterator();
+ c.attribute = (AttributeImpl) it.next();
+ while (it.hasNext()) {
+ c.next = new State();
+ c = c.next;
+ c.attribute = (AttributeImpl) it.next();
+ }
+ }
+
/**
- * Captures the current state of the passed in TokenStream.
- * <p>
- * This state will contain all of the passed in TokenStream's
- * {@link Attribute}s which the {@link AttributeAcceptor} accepts.
+ * Captures the state of all Attributes. The return value can be passed to
+ * {@link #restoreState} to restore the state of this or another AttributeSource.
*/
- public AttributeSource captureState(AttributeAcceptor acceptor) {
- AttributeSource state = new AttributeSource();
-
- Iterator it = getAttributesIterator();
- while(it.hasNext()) {
- Attribute att = (Attribute) it.next();
- if (acceptor.accept(att.getClass())) {
- Attribute clone = (Attribute) att.clone();
- state.attributes.put(att.getClass(), clone);
- }
+ public State captureState() {
+ if (!hasAttributes()) {
+ return null;
}
-
- return state;
+
+ if (currentState == null) {
+ computeCurrentState();
+ }
+ return (State) this.currentState.clone();
}
/**
- * Restores this state by copying the values of all attributes
- * that this state contains into the attributes of the targetStream.
+ * Restores this state by copying the values of all attribute implementations
+ * that this state contains into the attributes implementations of the targetStream.
* The targetStream must contain a corresponding instance for each argument
- * contained in this state.
+ * contained in this state (e.g. it is not possible to restore the state of
+ * an AttributeSource containing a TermAttribute into a AttributeSource using
+ * a Token instance as implementation).
* <p>
* Note that this method does not affect attributes of the targetStream
* that are not contained in this state. In other words, if for example
@@ -186,19 +306,22 @@
* reset its value to the default, in which case the caller should first
* call {@link TokenStream#clearAttributes()} on the targetStream.
*/
- public void restoreState(AttributeSource target) {
- Iterator it = getAttributesIterator();
- while (it.hasNext()) {
- Attribute att = (Attribute) it.next();
- Attribute targetAtt = target.getAttribute(att.getClass());
- att.copyTo(targetAtt);
- }
+ public void restoreState(State state) {
+ if (state == null) return;
+
+ do {
+ AttributeImpl targetImpl = (AttributeImpl) attributeImpls.get(state.attribute.getClass());
+ if (targetImpl == null)
+ throw new IllegalArgumentException("State contains an AttributeImpl that is not in this AttributeSource");
+ state.attribute.copyTo(targetImpl);
+ state = state.next;
+ } while (state != null);
}
-
+
public int hashCode() {
int code = 0;
if (hasAttributes()) {
- Iterator it = getAttributesIterator();
+ Iterator it = getAttributeImplsIterator();
while (it.hasNext()) {
code = code * 31 + it.next().hashCode();
}
@@ -220,16 +343,17 @@
return false;
}
- if (attributes.size() != other.attributes.size()) {
+ if (this.attributeImpls.size() != other.attributeImpls.size()) {
return false;
}
- Iterator it = getAttributesIterator();
- while (it.hasNext()) {
- Class attName = it.next().getClass();
-
- Attribute otherAtt = (Attribute) other.attributes.get(attName);
- if (otherAtt == null || !otherAtt.equals(attributes.get(attName))) {
+ // it is only equal if all attribute impls are the same in the same order
+ Iterator thisIt = this.getAttributeImplsIterator();
+ Iterator otherIt = other.getAttributeImplsIterator();
+ while (thisIt.hasNext() && otherIt.hasNext()) {
+ AttributeImpl thisAtt = (AttributeImpl) thisIt.next();
+ AttributeImpl otherAtt = (AttributeImpl) otherIt.next();
+ if (otherAtt.getClass() != thisAtt.getClass() || !otherAtt.equals(thisAtt)) {
return false;
}
}
@@ -240,38 +364,48 @@
} else
return false;
}
-
-// TODO: Java 1.5
-// private Map<Class<? extends Attribute>, Attribute> attributes;
-// public <T extends Attribute> T addAttribute(Class<T> attClass) {
-// T att = (T) attributes.get(attClass);
-// if (att == null) {
-// try {
-// att = attClass.newInstance();
-// } catch (InstantiationException e) {
-// throw new IllegalArgumentException("Could not instantiate class " + attClass);
-// } catch (IllegalAccessException e) {
-// throw new IllegalArgumentException("Could not instantiate class " + attClass);
-// }
-//
-// attributes.put(attClass, att);
-// }
-// return att;
-// }
-//
-// public boolean hasAttribute(Class<? extends Attribute> attClass) {
-// return this.attributes.containsKey(attClass);
-// }
-//
-// public <T extends Attribute> T getAttribute(Class<T> attClass) {
-// Attribute att = this.attributes.get(attClass);
-// if (att == null) {
-// throw new IllegalArgumentException("This token does not have the attribute '" + attClass + "'.");
-// }
-//
-// return (T) att;
-// }
-//
+ public String toString() {
+ StringBuffer sb = new StringBuffer();
+ sb.append('(');
+
+ if (hasAttributes()) {
+ Iterator it = getAttributeImplsIterator();
+ if (it.hasNext()) {
+ sb.append(it.next().toString());
+ }
+ while (it.hasNext()) {
+ sb.append(',');
+ sb.append(it.next().toString());
+ }
+ }
+ sb.append(')');
+ return sb.toString();
+ }
+
+ /**
+ * Performs a clone of all {@link AttributeImpl} instances returned in a new
+ * AttributeSource instance. This method can be used to e.g. create another TokenStream
+ * with exactly the same attributes (using {@link #AttributeSource(AttributeSource)})
+ */
+ public AttributeSource cloneAttributes() {
+ AttributeSource clone = new AttributeSource(this.factory);
+
+ // first clone the impls
+ Iterator/*<AttributeImpl>*/ implIt = getAttributeImplsIterator();
+ while (implIt.hasNext()) {
+ AttributeImpl impl = (AttributeImpl) implIt.next();
+ clone.attributeImpls.put(impl.getClass(), impl.clone());
+ }
+
+ // now the interfaces
+ Iterator/*<Entry<Class<Attribute>, AttributeImpl>>*/ attIt = this.attributes.entrySet().iterator();
+ while (attIt.hasNext()) {
+ Entry/*<Class<Attribute>, AttributeImpl>*/ entry = (Entry/*<Class<Attribute>, AttributeImpl>*/) attIt.next();
+ clone.attributes.put(entry.getKey(), clone.attributeImpls.get(entry.getValue().getClass()));
+ }
+
+ return clone;
+ }
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java Fri Jul 24 21:45:48 2009
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.LuceneTestCase;
import java.io.StringReader;
@@ -34,84 +35,84 @@
+" ð ñ ò ó ô õ ö ø Šà þ ù ú û ü ý ÿ ï¬ ï¬"));
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
- final Token reusableToken = new Token();
+ TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
- assertEquals("Des", filter.next(reusableToken).term());
- assertEquals("mot", filter.next(reusableToken).term());
- assertEquals("cles", filter.next(reusableToken).term());
- assertEquals("A", filter.next(reusableToken).term());
- assertEquals("LA", filter.next(reusableToken).term());
- assertEquals("CHAINE", filter.next(reusableToken).term());
- assertEquals("A", filter.next(reusableToken).term());
- assertEquals("A", filter.next(reusableToken).term());
- assertEquals("A", filter.next(reusableToken).term());
- assertEquals("A", filter.next(reusableToken).term());
- assertEquals("A", filter.next(reusableToken).term());
- assertEquals("A", filter.next(reusableToken).term());
- assertEquals("AE", filter.next(reusableToken).term());
- assertEquals("C", filter.next(reusableToken).term());
- assertEquals("E", filter.next(reusableToken).term());
- assertEquals("E", filter.next(reusableToken).term());
- assertEquals("E", filter.next(reusableToken).term());
- assertEquals("E", filter.next(reusableToken).term());
- assertEquals("I", filter.next(reusableToken).term());
- assertEquals("I", filter.next(reusableToken).term());
- assertEquals("I", filter.next(reusableToken).term());
- assertEquals("I", filter.next(reusableToken).term());
- assertEquals("IJ", filter.next(reusableToken).term());
- assertEquals("D", filter.next(reusableToken).term());
- assertEquals("N", filter.next(reusableToken).term());
- assertEquals("O", filter.next(reusableToken).term());
- assertEquals("O", filter.next(reusableToken).term());
- assertEquals("O", filter.next(reusableToken).term());
- assertEquals("O", filter.next(reusableToken).term());
- assertEquals("O", filter.next(reusableToken).term());
- assertEquals("O", filter.next(reusableToken).term());
- assertEquals("OE", filter.next(reusableToken).term());
- assertEquals("TH", filter.next(reusableToken).term());
- assertEquals("U", filter.next(reusableToken).term());
- assertEquals("U", filter.next(reusableToken).term());
- assertEquals("U", filter.next(reusableToken).term());
- assertEquals("U", filter.next(reusableToken).term());
- assertEquals("Y", filter.next(reusableToken).term());
- assertEquals("Y", filter.next(reusableToken).term());
- assertEquals("a", filter.next(reusableToken).term());
- assertEquals("a", filter.next(reusableToken).term());
- assertEquals("a", filter.next(reusableToken).term());
- assertEquals("a", filter.next(reusableToken).term());
- assertEquals("a", filter.next(reusableToken).term());
- assertEquals("a", filter.next(reusableToken).term());
- assertEquals("ae", filter.next(reusableToken).term());
- assertEquals("c", filter.next(reusableToken).term());
- assertEquals("e", filter.next(reusableToken).term());
- assertEquals("e", filter.next(reusableToken).term());
- assertEquals("e", filter.next(reusableToken).term());
- assertEquals("e", filter.next(reusableToken).term());
- assertEquals("i", filter.next(reusableToken).term());
- assertEquals("i", filter.next(reusableToken).term());
- assertEquals("i", filter.next(reusableToken).term());
- assertEquals("i", filter.next(reusableToken).term());
- assertEquals("ij", filter.next(reusableToken).term());
- assertEquals("d", filter.next(reusableToken).term());
- assertEquals("n", filter.next(reusableToken).term());
- assertEquals("o", filter.next(reusableToken).term());
- assertEquals("o", filter.next(reusableToken).term());
- assertEquals("o", filter.next(reusableToken).term());
- assertEquals("o", filter.next(reusableToken).term());
- assertEquals("o", filter.next(reusableToken).term());
- assertEquals("o", filter.next(reusableToken).term());
- assertEquals("oe", filter.next(reusableToken).term());
- assertEquals("ss", filter.next(reusableToken).term());
- assertEquals("th", filter.next(reusableToken).term());
- assertEquals("u", filter.next(reusableToken).term());
- assertEquals("u", filter.next(reusableToken).term());
- assertEquals("u", filter.next(reusableToken).term());
- assertEquals("u", filter.next(reusableToken).term());
- assertEquals("y", filter.next(reusableToken).term());
- assertEquals("y", filter.next(reusableToken).term());
- assertEquals("fi", filter.next(reusableToken).term());
- assertEquals("fl", filter.next(reusableToken).term());
- assertNull(filter.next(reusableToken));
+ assertTermEquals("Des", filter, termAtt);
+ assertTermEquals("mot", filter, termAtt);
+ assertTermEquals("cles", filter, termAtt);
+ assertTermEquals("A", filter, termAtt);
+ assertTermEquals("LA", filter, termAtt);
+ assertTermEquals("CHAINE", filter, termAtt);
+ assertTermEquals("A", filter, termAtt);
+ assertTermEquals("A", filter, termAtt);
+ assertTermEquals("A", filter, termAtt);
+ assertTermEquals("A", filter, termAtt);
+ assertTermEquals("A", filter, termAtt);
+ assertTermEquals("A", filter, termAtt);
+ assertTermEquals("AE", filter, termAtt);
+ assertTermEquals("C", filter, termAtt);
+ assertTermEquals("E", filter, termAtt);
+ assertTermEquals("E", filter, termAtt);
+ assertTermEquals("E", filter, termAtt);
+ assertTermEquals("E", filter, termAtt);
+ assertTermEquals("I", filter, termAtt);
+ assertTermEquals("I", filter, termAtt);
+ assertTermEquals("I", filter, termAtt);
+ assertTermEquals("I", filter, termAtt);
+ assertTermEquals("IJ", filter, termAtt);
+ assertTermEquals("D", filter, termAtt);
+ assertTermEquals("N", filter, termAtt);
+ assertTermEquals("O", filter, termAtt);
+ assertTermEquals("O", filter, termAtt);
+ assertTermEquals("O", filter, termAtt);
+ assertTermEquals("O", filter, termAtt);
+ assertTermEquals("O", filter, termAtt);
+ assertTermEquals("O", filter, termAtt);
+ assertTermEquals("OE", filter, termAtt);
+ assertTermEquals("TH", filter, termAtt);
+ assertTermEquals("U", filter, termAtt);
+ assertTermEquals("U", filter, termAtt);
+ assertTermEquals("U", filter, termAtt);
+ assertTermEquals("U", filter, termAtt);
+ assertTermEquals("Y", filter, termAtt);
+ assertTermEquals("Y", filter, termAtt);
+ assertTermEquals("a", filter, termAtt);
+ assertTermEquals("a", filter, termAtt);
+ assertTermEquals("a", filter, termAtt);
+ assertTermEquals("a", filter, termAtt);
+ assertTermEquals("a", filter, termAtt);
+ assertTermEquals("a", filter, termAtt);
+ assertTermEquals("ae", filter, termAtt);
+ assertTermEquals("c", filter, termAtt);
+ assertTermEquals("e", filter, termAtt);
+ assertTermEquals("e", filter, termAtt);
+ assertTermEquals("e", filter, termAtt);
+ assertTermEquals("e", filter, termAtt);
+ assertTermEquals("i", filter, termAtt);
+ assertTermEquals("i", filter, termAtt);
+ assertTermEquals("i", filter, termAtt);
+ assertTermEquals("i", filter, termAtt);
+ assertTermEquals("ij", filter, termAtt);
+ assertTermEquals("d", filter, termAtt);
+ assertTermEquals("n", filter, termAtt);
+ assertTermEquals("o", filter, termAtt);
+ assertTermEquals("o", filter, termAtt);
+ assertTermEquals("o", filter, termAtt);
+ assertTermEquals("o", filter, termAtt);
+ assertTermEquals("o", filter, termAtt);
+ assertTermEquals("o", filter, termAtt);
+ assertTermEquals("oe", filter, termAtt);
+ assertTermEquals("ss", filter, termAtt);
+ assertTermEquals("th", filter, termAtt);
+ assertTermEquals("u", filter, termAtt);
+ assertTermEquals("u", filter, termAtt);
+ assertTermEquals("u", filter, termAtt);
+ assertTermEquals("u", filter, termAtt);
+ assertTermEquals("y", filter, termAtt);
+ assertTermEquals("y", filter, termAtt);
+ assertTermEquals("fi", filter, termAtt);
+ assertTermEquals("fl", filter, termAtt);
+ assertFalse(filter.incrementToken());
}
@@ -1891,11 +1892,16 @@
TokenStream stream = new WhitespaceTokenizer(new StringReader(inputText.toString()));
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
- final Token reusableToken = new Token();
+ TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
Iterator expectedIter = expectedOutputTokens.iterator();
- while (expectedIter.hasNext()) {
- assertEquals(expectedIter.next(), filter.next(reusableToken).term());
+ while (expectedIter.hasNext()) {;
+ assertTermEquals((String)expectedIter.next(), filter, termAtt);
}
- assertNull(filter.next(reusableToken));
+ assertFalse(filter.incrementToken());
+ }
+
+ void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
+ assertTrue(stream.incrementToken());
+ assertEquals(expected, termAtt.term());
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java Fri Jul 24 21:45:48 2009
@@ -27,9 +27,8 @@
static final long lvalue = 4573245871874382L;
static final int ivalue = 123456;
- public void testLongStreamNewAPI() throws Exception {
+ public void testLongStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
- stream.setUseNewAPI(true);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
@@ -40,22 +39,9 @@
}
assertFalse("No more tokens available", stream.incrementToken());
}
-
- public void testLongStreamOldAPI() throws Exception {
- final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
- stream.setUseNewAPI(false);
- Token tok=new Token();
- for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
- assertNotNull("New token is available", tok=stream.next(tok));
- assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), tok.term());
- assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
- }
- assertNull("No more tokens available", stream.next(tok));
- }
- public void testIntStreamNewAPI() throws Exception {
+ public void testIntStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
- stream.setUseNewAPI(true);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
@@ -67,18 +53,6 @@
assertFalse("No more tokens available", stream.incrementToken());
}
- public void testIntStreamOldAPI() throws Exception {
- final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
- stream.setUseNewAPI(false);
- Token tok=new Token();
- for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
- assertNotNull("New token is available", tok=stream.next(tok));
- assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), tok.term());
- assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
- }
- assertNull("No more tokens available", stream.next(tok));
- }
-
public void testNotInitialized() throws Exception {
final NumericTokenStream stream=new NumericTokenStream();
@@ -89,21 +63,12 @@
// pass
}
- stream.setUseNewAPI(true);
try {
stream.incrementToken();
fail("incrementToken() should not succeed.");
} catch (IllegalStateException e) {
// pass
}
-
- stream.setUseNewAPI(false);
- try {
- stream.next(new Token());
- fail("next() should not succeed.");
- } catch (IllegalStateException e) {
- // pass
- }
}
}
Added: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java?rev=797665&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java Fri Jul 24 21:45:48 2009
@@ -0,0 +1,267 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.English;
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * tests for the TestTeeSinkTokenFilter
+ */
+public class TestTeeSinkTokenFilter extends LuceneTestCase {
+ protected StringBuffer buffer1;
+ protected StringBuffer buffer2;
+ protected String[] tokens1;
+ protected String[] tokens2;
+
+
+ public TestTeeSinkTokenFilter(String s) {
+ super(s);
+ }
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ tokens1 = new String[]{"The", "quick", "Burgundy", "Fox", "jumped", "over", "the", "lazy", "Red", "Dogs"};
+ tokens2 = new String[]{"The", "Lazy", "Dogs", "should", "stay", "on", "the", "porch"};
+ buffer1 = new StringBuffer();
+
+ for (int i = 0; i < tokens1.length; i++) {
+ buffer1.append(tokens1[i]).append(' ');
+ }
+ buffer2 = new StringBuffer();
+ for (int i = 0; i < tokens2.length; i++) {
+ buffer2.append(tokens2[i]).append(' ');
+ }
+ }
+
+ static final TeeSinkTokenFilter.SinkFilter theFilter = new TeeSinkTokenFilter.SinkFilter() {
+ public boolean accept(AttributeSource a) {
+ TermAttribute termAtt = (TermAttribute) a.getAttribute(TermAttribute.class);
+ return termAtt.term().equalsIgnoreCase("The");
+ }
+ };
+
+ static final TeeSinkTokenFilter.SinkFilter dogFilter = new TeeSinkTokenFilter.SinkFilter() {
+ public boolean accept(AttributeSource a) {
+ TermAttribute termAtt = (TermAttribute) a.getAttribute(TermAttribute.class);
+ return termAtt.term().equalsIgnoreCase("Dogs");
+ }
+ };
+
+
+ public void testGeneral() throws IOException {
+ final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())));
+ final TokenStream sink1 = source.newSinkTokenStream();
+ final TokenStream sink2 = source.newSinkTokenStream(theFilter);
+ int i = 0;
+ TermAttribute termAtt = (TermAttribute) source.getAttribute(TermAttribute.class);
+ while (source.incrementToken()) {
+ assertEquals(tokens1[i], termAtt.term());
+ i++;
+ }
+ assertEquals(tokens1.length, i);
+
+ i = 0;
+ termAtt = (TermAttribute) sink1.getAttribute(TermAttribute.class);
+ while (sink1.incrementToken()) {
+ assertEquals(tokens1[i], termAtt.term());
+ i++;
+ }
+ assertEquals(tokens1.length, i);
+
+ i = 0;
+ termAtt = (TermAttribute) sink2.getAttribute(TermAttribute.class);
+ while (sink2.incrementToken()) {
+ assertTrue(termAtt.term().equalsIgnoreCase("The"));
+ i++;
+ }
+ assertEquals("there should be two times 'the' in the stream", 2, i);
+ }
+
+ public void testMultipleSources() throws Exception {
+ final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())));
+ final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
+ final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
+ final TokenStream source1 = new CachingTokenFilter(tee1);
+
+ final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer2.toString())));
+ tee2.addSinkTokenStream(dogDetector);
+ tee2.addSinkTokenStream(theDetector);
+ final TokenStream source2 = tee2;
+
+ int i = 0;
+ TermAttribute termAtt = (TermAttribute) source1.getAttribute(TermAttribute.class);
+ while (source1.incrementToken()) {
+ assertEquals(tokens1[i], termAtt.term());
+ i++;
+ }
+ assertEquals(tokens1.length, i);
+ i = 0;
+ termAtt = (TermAttribute) source2.getAttribute(TermAttribute.class);
+ while (source2.incrementToken()) {
+ assertEquals(tokens2[i], termAtt.term());
+ i++;
+ }
+ assertEquals(tokens2.length, i);
+ i = 0;
+ termAtt = (TermAttribute) theDetector.getAttribute(TermAttribute.class);
+ while (theDetector.incrementToken()) {
+ assertTrue("'" + termAtt.term() + "' is not equal to 'The'", termAtt.term().equalsIgnoreCase("The"));
+ i++;
+ }
+ assertEquals("there must be 4 times 'The' in the stream", 4, i);
+ i = 0;
+ termAtt = (TermAttribute) dogDetector.getAttribute(TermAttribute.class);
+ while (dogDetector.incrementToken()) {
+ assertTrue("'" + termAtt.term() + "' is not equal to 'Dogs'", termAtt.term().equalsIgnoreCase("Dogs"));
+ i++;
+ }
+ assertEquals("there must be 2 times 'Dog' in the stream", 2, i);
+
+ source1.reset();
+ TokenStream lowerCasing = new LowerCaseFilter(source1);
+ i = 0;
+ termAtt = (TermAttribute) lowerCasing.getAttribute(TermAttribute.class);
+ while (lowerCasing.incrementToken()) {
+ assertEquals(tokens1[i].toLowerCase(), termAtt.term());
+ i++;
+ }
+ assertEquals(i, tokens1.length);
+ }
+
+ /**
+ * Not an explicit test, just useful to print out some info on performance
+ *
+ * @throws Exception
+ */
+ public void performance() throws Exception {
+ int[] tokCount = {100, 500, 1000, 2000, 5000, 10000};
+ int[] modCounts = {1, 2, 5, 10, 20, 50, 100, 200, 500};
+ for (int k = 0; k < tokCount.length; k++) {
+ StringBuffer buffer = new StringBuffer();
+ System.out.println("-----Tokens: " + tokCount[k] + "-----");
+ for (int i = 0; i < tokCount[k]; i++) {
+ buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
+ }
+ //make sure we produce the same tokens
+ TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))));
+ TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
+ teeStream.consumeAllTokens();
+ TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), 100);
+ TermAttribute tfTok = (TermAttribute) stream.addAttribute(TermAttribute.class);
+ TermAttribute sinkTok = (TermAttribute) sink.addAttribute(TermAttribute.class);
+ for (int i=0; stream.incrementToken(); i++) {
+ assertTrue(sink.incrementToken());
+ assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
+ }
+
+ //simulate two fields, each being analyzed once, for 20 documents
+ for (int j = 0; j < modCounts.length; j++) {
+ int tfPos = 0;
+ long start = System.currentTimeMillis();
+ for (int i = 0; i < 20; i++) {
+ stream = new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString())));
+ PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class);
+ while (stream.incrementToken()) {
+ tfPos += posIncrAtt.getPositionIncrement();
+ }
+ stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), modCounts[j]);
+ posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class);
+ while (stream.incrementToken()) {
+ tfPos += posIncrAtt.getPositionIncrement();
+ }
+ }
+ long finish = System.currentTimeMillis();
+ System.out.println("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
+ int sinkPos = 0;
+ //simulate one field with one sink
+ start = System.currentTimeMillis();
+ for (int i = 0; i < 20; i++) {
+ teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))));
+ sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j]));
+ PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) teeStream.getAttribute(PositionIncrementAttribute.class);
+ while (teeStream.incrementToken()) {
+ sinkPos += posIncrAtt.getPositionIncrement();
+ }
+ //System.out.println("Modulo--------");
+ posIncrAtt = (PositionIncrementAttribute) sink.getAttribute(PositionIncrementAttribute.class);
+ while (sink.incrementToken()) {
+ sinkPos += posIncrAtt.getPositionIncrement();
+ }
+ }
+ finish = System.currentTimeMillis();
+ System.out.println("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
+ assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos);
+
+ }
+ System.out.println("- End Tokens: " + tokCount[k] + "-----");
+ }
+
+ }
+
+
+ class ModuloTokenFilter extends TokenFilter {
+
+ int modCount;
+
+ ModuloTokenFilter(TokenStream input, int mc) {
+ super(input);
+ modCount = mc;
+ }
+
+ int count = 0;
+
+ //return every 100 tokens
+ public boolean incrementToken() throws IOException {
+ boolean hasNext;
+ for (hasNext = input.incrementToken();
+ hasNext && count % modCount != 0;
+ hasNext = input.incrementToken()) {
+ count++;
+ }
+ count++;
+ return hasNext;
+ }
+ }
+
+ class ModuloSinkFilter implements TeeSinkTokenFilter.SinkFilter {
+ int count = 0;
+ int modCount;
+
+ ModuloSinkFilter(int mc) {
+ modCount = mc;
+ }
+
+ public boolean accept(AttributeSource a) {
+ boolean b = (a != null && count % modCount == 0);
+ count++;
+ return b;
+ }
+
+ }
+}
+
Propchange: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeTokenFilter.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeTokenFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeTokenFilter.java Fri Jul 24 21:45:48 2009
@@ -18,9 +18,6 @@
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
@@ -43,8 +40,7 @@
super(s);
}
- protected void setUp() throws Exception {
- super.setUp();
+ protected void setUp() {
tokens1 = new String[]{"The", "quick", "Burgundy", "Fox", "jumped", "over", "the", "lazy", "Red", "Dogs"};
tokens2 = new String[]{"The", "Lazy", "Dogs", "should", "stay", "on", "the", "porch"};
buffer1 = new StringBuffer();
@@ -66,29 +62,24 @@
public void test() throws IOException {
SinkTokenizer sink1 = new SinkTokenizer(null) {
- public void add(AttributeSource a) throws IOException {
- TermAttribute termAtt = null;
- if (a.hasAttribute(TermAttribute.class)) {
- termAtt = (TermAttribute) a.getAttribute(TermAttribute.class);
- }
- if (termAtt != null && termAtt.term().equalsIgnoreCase("The")) {
- super.add(a);
+ public void add(Token t) {
+ if (t != null && t.term().equalsIgnoreCase("The")) {
+ super.add(t);
}
}
};
TokenStream source = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())), sink1);
int i = 0;
- TermAttribute termAtt = (TermAttribute) source.getAttribute(TermAttribute.class);
- while (source.incrementToken()) {
- assertTrue(termAtt.term() + " is not equal to " + tokens1[i], termAtt.term().equals(tokens1[i]) == true);
+ final Token reusableToken = new Token();
+ for (Token nextToken = source.next(reusableToken); nextToken != null; nextToken = source.next(reusableToken)) {
+ assertTrue(nextToken.term() + " is not equal to " + tokens1[i], nextToken.term().equals(tokens1[i]) == true);
i++;
}
assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length);
assertTrue("sink1 Size: " + sink1.getTokens().size() + " is not: " + 2, sink1.getTokens().size() == 2);
i = 0;
- termAtt = (TermAttribute) sink1.getAttribute(TermAttribute.class);
- while (sink1.incrementToken()) {
- assertTrue(termAtt.term() + " is not equal to " + "The", termAtt.term().equalsIgnoreCase("The") == true);
+ for (Token token = sink1.next(reusableToken); token != null; token = sink1.next(reusableToken)) {
+ assertTrue(token.term() + " is not equal to " + "The", token.term().equalsIgnoreCase("The") == true);
i++;
}
assertTrue(i + " does not equal: " + sink1.getTokens().size(), i == sink1.getTokens().size());
@@ -96,67 +87,55 @@
public void testMultipleSources() throws Exception {
SinkTokenizer theDetector = new SinkTokenizer(null) {
- public void add(AttributeSource a) throws IOException {
- TermAttribute termAtt = null;
- if (a.hasAttribute(TermAttribute.class)) {
- termAtt = (TermAttribute) a.getAttribute(TermAttribute.class);
- }
- if (termAtt != null && termAtt.term().equalsIgnoreCase("The")) {
- super.add(a);
+ public void add(Token t) {
+ if (t != null && t.term().equalsIgnoreCase("The")) {
+ super.add(t);
}
}
};
- SinkTokenizer dogDetector = new SinkTokenizer(null) {
- public void add(AttributeSource a) throws IOException {
- TermAttribute termAtt = null;
- if (a.hasAttribute(TermAttribute.class)) {
- termAtt = (TermAttribute) a.getAttribute(TermAttribute.class);
- }
- if (termAtt != null && termAtt.term().equalsIgnoreCase("Dogs")) {
- super.add(a);
+ SinkTokenizer dogDetector = new SinkTokenizer(null) {
+ public void add(Token t) {
+ if (t != null && t.term().equalsIgnoreCase("Dogs")) {
+ super.add(t);
}
}
};
TokenStream source1 = new CachingTokenFilter(new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())), theDetector), dogDetector));
TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer2.toString())), theDetector), dogDetector);
int i = 0;
- TermAttribute termAtt = (TermAttribute) source1.getAttribute(TermAttribute.class);
- while (source1.incrementToken()) {
- assertTrue(termAtt.term() + " is not equal to " + tokens1[i], termAtt.term().equals(tokens1[i]) == true);
+ final Token reusableToken = new Token();
+ for (Token nextToken = source1.next(reusableToken); nextToken != null; nextToken = source1.next(reusableToken)) {
+ assertTrue(nextToken.term() + " is not equal to " + tokens1[i], nextToken.term().equals(tokens1[i]) == true);
i++;
}
assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length);
assertTrue("theDetector Size: " + theDetector.getTokens().size() + " is not: " + 2, theDetector.getTokens().size() == 2);
assertTrue("dogDetector Size: " + dogDetector.getTokens().size() + " is not: " + 1, dogDetector.getTokens().size() == 1);
i = 0;
- termAtt = (TermAttribute) source2.getAttribute(TermAttribute.class);
- while (source2.incrementToken()) {
- assertTrue(termAtt.term() + " is not equal to " + tokens2[i], termAtt.term().equals(tokens2[i]) == true);
+ for (Token nextToken = source2.next(reusableToken); nextToken != null; nextToken = source2.next(reusableToken)) {
+ assertTrue(nextToken.term() + " is not equal to " + tokens2[i], nextToken.term().equals(tokens2[i]) == true);
i++;
}
assertTrue(i + " does not equal: " + tokens2.length, i == tokens2.length);
assertTrue("theDetector Size: " + theDetector.getTokens().size() + " is not: " + 4, theDetector.getTokens().size() == 4);
assertTrue("dogDetector Size: " + dogDetector.getTokens().size() + " is not: " + 2, dogDetector.getTokens().size() == 2);
i = 0;
- termAtt = (TermAttribute) theDetector.getAttribute(TermAttribute.class);
- while (theDetector.incrementToken()) {
- assertTrue(termAtt.term() + " is not equal to " + "The", termAtt.term().equalsIgnoreCase("The") == true);
+ for (Token nextToken = theDetector.next(reusableToken); nextToken != null; nextToken = theDetector.next(reusableToken)) {
+ assertTrue(nextToken.term() + " is not equal to " + "The", nextToken.term().equalsIgnoreCase("The") == true);
i++;
}
assertTrue(i + " does not equal: " + theDetector.getTokens().size(), i == theDetector.getTokens().size());
i = 0;
- termAtt = (TermAttribute) dogDetector.getAttribute(TermAttribute.class);
- while (dogDetector.incrementToken()) {
- assertTrue(termAtt.term() + " is not equal to " + "Dogs", termAtt.term().equalsIgnoreCase("Dogs") == true);
+ for (Token nextToken = dogDetector.next(reusableToken); nextToken != null; nextToken = dogDetector.next(reusableToken)) {
+ assertTrue(nextToken.term() + " is not equal to " + "Dogs", nextToken.term().equalsIgnoreCase("Dogs") == true);
i++;
}
assertTrue(i + " does not equal: " + dogDetector.getTokens().size(), i == dogDetector.getTokens().size());
source1.reset();
TokenStream lowerCasing = new LowerCaseFilter(source1);
i = 0;
- termAtt = (TermAttribute) lowerCasing.getAttribute(TermAttribute.class);
- while (lowerCasing.incrementToken()) {
- assertTrue(termAtt.term() + " is not equal to " + tokens1[i].toLowerCase(), termAtt.term().equals(tokens1[i].toLowerCase()) == true);
+ for (Token nextToken = lowerCasing.next(reusableToken); nextToken != null; nextToken = lowerCasing.next(reusableToken)) {
+ assertTrue(nextToken.term() + " is not equal to " + tokens1[i].toLowerCase(), nextToken.term().equals(tokens1[i].toLowerCase()) == true);
i++;
}
assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length);
@@ -167,7 +146,7 @@
*
* @throws Exception
*/
- public void doTestPerformance() throws Exception {
+ public void performance() throws Exception {
int[] tokCount = {100, 500, 1000, 2000, 5000, 10000};
int[] modCounts = {1, 2, 5, 10, 20, 50, 100, 200, 500};
for (int k = 0; k < tokCount.length; k++) {
@@ -178,20 +157,21 @@
}
//make sure we produce the same tokens
ModuloSinkTokenizer sink = new ModuloSinkTokenizer(tokCount[k], 100);
+ final Token reusableToken = new Token();
TokenStream stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), sink);
- while (stream.incrementToken()) {
+ while (stream.next(reusableToken) != null) {
}
stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), 100);
List tmp = new ArrayList();
- while (stream.incrementToken()) {
- tmp.add(stream.captureState());
+ for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
+ tmp.add(nextToken.clone());
}
List sinkList = sink.getTokens();
assertTrue("tmp Size: " + tmp.size() + " is not: " + sinkList.size(), tmp.size() == sinkList.size());
for (int i = 0; i < tmp.size(); i++) {
- AttributeSource tfTok = (AttributeSource) tmp.get(i);
- AttributeSource sinkTok = (AttributeSource) sinkList.get(i);
- assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
+ Token tfTok = (Token) tmp.get(i);
+ Token sinkTok = (Token) sinkList.get(i);
+ assertTrue(tfTok.term() + " is not equal to " + sinkTok.term() + " at token: " + i, tfTok.term().equals(sinkTok.term()) == true);
}
//simulate two fields, each being analyzed once, for 20 documents
@@ -200,14 +180,12 @@
long start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
stream = new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString())));
- PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class);
- while (stream.incrementToken()) {
- tfPos += posIncrAtt.getPositionIncrement();
+ for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
+ tfPos += nextToken.getPositionIncrement();
}
stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), modCounts[j]);
- posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class);
- while (stream.incrementToken()) {
- tfPos += posIncrAtt.getPositionIncrement();
+ for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
+ tfPos += nextToken.getPositionIncrement();
}
}
long finish = System.currentTimeMillis();
@@ -218,15 +196,13 @@
for (int i = 0; i < 20; i++) {
sink = new ModuloSinkTokenizer(tokCount[k], modCounts[j]);
stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), sink);
- PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class);
- while (stream.incrementToken()) {
- sinkPos += posIncrAtt.getPositionIncrement();
+ for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
+ sinkPos += nextToken.getPositionIncrement();
}
//System.out.println("Modulo--------");
stream = sink;
- posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class);
- while (stream.incrementToken()) {
- sinkPos += posIncrAtt.getPositionIncrement();
+ for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
+ sinkPos += nextToken.getPositionIncrement();
}
}
finish = System.currentTimeMillis();
@@ -252,15 +228,15 @@
int count = 0;
//return every 100 tokens
- public boolean incrementToken() throws IOException {
- boolean hasNext;
- for (hasNext = input.incrementToken();
- hasNext && count % modCount != 0;
- hasNext = input.incrementToken()) {
+ public Token next(final Token reusableToken) throws IOException {
+ Token nextToken = null;
+ for (nextToken = input.next(reusableToken);
+ nextToken != null && count % modCount != 0;
+ nextToken = input.next(reusableToken)) {
count++;
}
count++;
- return hasNext;
+ return nextToken;
}
}
@@ -274,9 +250,9 @@
lst = new ArrayList(numToks % mc);
}
- public void add(AttributeSource a) throws IOException {
- if (a != null && count % modCount == 0) {
- super.add(a);
+ public void add(Token t) {
+ if (t != null && count % modCount == 0) {
+ super.add(t);
}
count++;
}
Added: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java?rev=797665&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java Fri Jul 24 21:45:48 2009
@@ -0,0 +1,311 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.*;
+
+/** This class tests some special cases of backwards compatibility when using the new TokenStream API with old analyzers */
+public class TestTokenStreamBWComp extends LuceneTestCase {
+
+ private final String doc = "This is the new TokenStream api";
+ private final String[] stopwords = new String[] {"is", "the", "this"};
+
+ public static class POSToken extends Token {
+ public static final int PROPERNOUN = 1;
+ public static final int NO_NOUN = 2;
+
+ private int partOfSpeech;
+
+ public void setPartOfSpeech(int pos) {
+ partOfSpeech = pos;
+ }
+
+ public int getPartOfSpeech() {
+ return this.partOfSpeech;
+ }
+ }
+
+ static class PartOfSpeechTaggingFilter extends TokenFilter {
+
+ protected PartOfSpeechTaggingFilter(TokenStream input) {
+ super(input);
+ }
+
+ public Token next() throws IOException {
+ Token t = input.next();
+ if (t == null) return null;
+
+ POSToken pt = new POSToken();
+ pt.reinit(t);
+ if (pt.termLength() > 0) {
+ if (Character.isUpperCase(pt.termBuffer()[0])) {
+ pt.setPartOfSpeech(POSToken.PROPERNOUN);
+ } else {
+ pt.setPartOfSpeech(POSToken.NO_NOUN);
+ }
+ }
+ return pt;
+ }
+
+ }
+
+ static class PartOfSpeechAnnotatingFilter extends TokenFilter {
+ public final static byte PROPER_NOUN_ANNOTATION = 1;
+
+
+ protected PartOfSpeechAnnotatingFilter(TokenStream input) {
+ super(input);
+ }
+
+ public Token next() throws IOException {
+ Token t = input.next();
+ if (t == null) return null;
+
+ if (t instanceof POSToken) {
+ POSToken pt = (POSToken) t;
+ if (pt.getPartOfSpeech() == POSToken.PROPERNOUN) {
+ pt.setPayload(new Payload(new byte[] {PROPER_NOUN_ANNOTATION}));
+ }
+ return pt;
+ } else {
+ return t;
+ }
+ }
+
+ }
+
+ // test the chain: The one and only term "TokenStream" should be declared as proper noun:
+
+ public void testTeeSinkCustomTokenNewAPI() throws IOException {
+ testTeeSinkCustomToken(0);
+ }
+
+ public void testTeeSinkCustomTokenOldAPI() throws IOException {
+ testTeeSinkCustomToken(1);
+ }
+
+ public void testTeeSinkCustomTokenVeryOldAPI() throws IOException {
+ testTeeSinkCustomToken(2);
+ }
+
+ private void testTeeSinkCustomToken(int api) throws IOException {
+ TokenStream stream = new WhitespaceTokenizer(new StringReader(doc));
+ stream = new PartOfSpeechTaggingFilter(stream);
+ stream = new LowerCaseFilter(stream);
+ stream = new StopFilter(stream, stopwords);
+
+ SinkTokenizer sink = new SinkTokenizer();
+ TokenStream stream1 = new PartOfSpeechAnnotatingFilter(sink);
+
+ stream = new TeeTokenFilter(stream, sink);
+ stream = new PartOfSpeechAnnotatingFilter(stream);
+
+ switch (api) {
+ case 0:
+ consumeStreamNewAPI(stream);
+ consumeStreamNewAPI(stream1);
+ break;
+ case 1:
+ consumeStreamOldAPI(stream);
+ consumeStreamOldAPI(stream1);
+ break;
+ case 2:
+ consumeStreamVeryOldAPI(stream);
+ consumeStreamVeryOldAPI(stream1);
+ break;
+ }
+ }
+
+ // test caching the special custom POSToken works in all cases
+
+ public void testCachingCustomTokenNewAPI() throws IOException {
+ testTeeSinkCustomToken(0);
+ }
+
+ public void testCachingCustomTokenOldAPI() throws IOException {
+ testTeeSinkCustomToken(1);
+ }
+
+ public void testCachingCustomTokenVeryOldAPI() throws IOException {
+ testTeeSinkCustomToken(2);
+ }
+
+ public void testCachingCustomTokenMixed() throws IOException {
+ testTeeSinkCustomToken(3);
+ }
+
+ private void testCachingCustomToken(int api) throws IOException {
+ TokenStream stream = new WhitespaceTokenizer(new StringReader(doc));
+ stream = new PartOfSpeechTaggingFilter(stream);
+ stream = new LowerCaseFilter(stream);
+ stream = new StopFilter(stream, stopwords);
+ stream = new CachingTokenFilter(stream); // <- the caching is done before the annotating!
+ stream = new PartOfSpeechAnnotatingFilter(stream);
+
+ switch (api) {
+ case 0:
+ consumeStreamNewAPI(stream);
+ consumeStreamNewAPI(stream);
+ break;
+ case 1:
+ consumeStreamOldAPI(stream);
+ consumeStreamOldAPI(stream);
+ break;
+ case 2:
+ consumeStreamVeryOldAPI(stream);
+ consumeStreamVeryOldAPI(stream);
+ break;
+ case 3:
+ consumeStreamNewAPI(stream);
+ consumeStreamOldAPI(stream);
+ consumeStreamVeryOldAPI(stream);
+ consumeStreamNewAPI(stream);
+ consumeStreamVeryOldAPI(stream);
+ break;
+ }
+ }
+
+ private static void consumeStreamNewAPI(TokenStream stream) throws IOException {
+ stream.reset();
+ PayloadAttribute payloadAtt = (PayloadAttribute) stream.addAttribute(PayloadAttribute.class);
+ TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
+
+ while (stream.incrementToken()) {
+ String term = termAtt.term();
+ Payload p = payloadAtt.getPayload();
+ if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) {
+ assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term));
+ } else {
+ assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term));
+ }
+ }
+ }
+
+ private static void consumeStreamOldAPI(TokenStream stream) throws IOException {
+ stream.reset();
+ Token reusableToken = new Token();
+
+ while ((reusableToken = stream.next(reusableToken)) != null) {
+ String term = reusableToken.term();
+ Payload p = reusableToken.getPayload();
+ if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) {
+ assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term));
+ } else {
+ assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term));
+ }
+ }
+ }
+
+ private static void consumeStreamVeryOldAPI(TokenStream stream) throws IOException {
+ stream.reset();
+
+ Token token;
+ while ((token = stream.next()) != null) {
+ String term = token.term();
+ Payload p = token.getPayload();
+ if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) {
+ assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term));
+ } else {
+ assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term));
+ }
+ }
+ }
+
+ // test if tokenization fails, if only the new API is allowed and an old TokenStream is in the chain
+ public void testOnlyNewAPI() throws IOException {
+ TokenStream.setOnlyUseNewAPI(true);
+ try {
+
+ // this should fail with UOE
+ try {
+ TokenStream stream = new WhitespaceTokenizer(new StringReader(doc));
+ stream = new PartOfSpeechTaggingFilter(stream); // <-- this one is evil!
+ stream = new LowerCaseFilter(stream);
+ stream = new StopFilter(stream, stopwords);
+ while (stream.incrementToken());
+ fail("If only the new API is allowed, this should fail with an UOE");
+ } catch (UnsupportedOperationException uoe) {
+ assertTrue((PartOfSpeechTaggingFilter.class.getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI.").equals(uoe.getMessage()));
+ }
+
+ // this should pass, as all core token streams support the new API
+ TokenStream stream = new WhitespaceTokenizer(new StringReader(doc));
+ stream = new LowerCaseFilter(stream);
+ stream = new StopFilter(stream, stopwords);
+ while (stream.incrementToken());
+
+ // Test, if all attributes are implemented by their implementation, not Token/TokenWrapper
+ assertTrue("TermAttribute is implemented by TermAttributeImpl",
+ stream.addAttribute(TermAttribute.class) instanceof TermAttributeImpl);
+ assertTrue("OffsetAttribute is implemented by OffsetAttributeImpl",
+ stream.addAttribute(OffsetAttribute.class) instanceof OffsetAttributeImpl);
+ assertTrue("FlagsAttribute is implemented by FlagsAttributeImpl",
+ stream.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl);
+ assertTrue("PayloadAttribute is implemented by PayloadAttributeImpl",
+ stream.addAttribute(PayloadAttribute.class) instanceof PayloadAttributeImpl);
+ assertTrue("PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl",
+ stream.addAttribute(PositionIncrementAttribute.class) instanceof PositionIncrementAttributeImpl);
+ assertTrue("TypeAttribute is implemented by TypeAttributeImpl",
+ stream.addAttribute(TypeAttribute.class) instanceof TypeAttributeImpl);
+
+ // Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper
+ // as attribute instance.
+ // TokenWrapper encapsulates a Token instance that can be exchanged
+ // by another Token instance without changing the AttributeImpl instance
+ // itsself.
+ TokenStream.setOnlyUseNewAPI(false);
+ stream = new WhitespaceTokenizer(new StringReader(doc));
+ assertTrue("TermAttribute is implemented by TokenWrapper",
+ stream.addAttribute(TermAttribute.class) instanceof TokenWrapper);
+ assertTrue("OffsetAttribute is implemented by TokenWrapper",
+ stream.addAttribute(OffsetAttribute.class) instanceof TokenWrapper);
+ assertTrue("FlagsAttribute is implemented by TokenWrapper",
+ stream.addAttribute(FlagsAttribute.class) instanceof TokenWrapper);
+ assertTrue("PayloadAttribute is implemented by TokenWrapper",
+ stream.addAttribute(PayloadAttribute.class) instanceof TokenWrapper);
+ assertTrue("PositionIncrementAttribute is implemented by TokenWrapper",
+ stream.addAttribute(PositionIncrementAttribute.class) instanceof TokenWrapper);
+ assertTrue("TypeAttribute is implemented by TokenWrapper",
+ stream.addAttribute(TypeAttribute.class) instanceof TokenWrapper);
+
+ } finally {
+ TokenStream.setOnlyUseNewAPI(false);
+ }
+ }
+
+ public void testOverridesAny() throws Exception {
+ try {
+ TokenStream stream = new WhitespaceTokenizer(new StringReader(doc));
+ stream = new TokenFilter(stream) {
+ // we implement nothing, only un-abstract it
+ };
+ stream = new LowerCaseFilter(stream);
+ stream = new StopFilter(stream, stopwords);
+ while (stream.incrementToken());
+ fail("One TokenFilter does not override any of the required methods, so it should fail.");
+ } catch (UnsupportedOperationException uoe) {
+ assertTrue(uoe.getMessage().endsWith("does not implement any of incrementToken(), next(Token), next()."));
+ }
+ }
+
+}
Propchange: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java Fri Jul 24 21:45:48 2009
@@ -141,11 +141,11 @@
public TokenStream tokenStream(String fieldName, Reader reader) {
return new TokenFilter(new WhitespaceTokenizer(reader)) {
boolean first=true;
- AttributeSource state;
+ AttributeSource.State state;
public boolean incrementToken() throws IOException {
if (state != null) {
- state.restoreState(this);
+ restoreState(state);
payloadAtt.setPayload(null);
posIncrAtt.setPositionIncrement(0);
termAtt.setTermBuffer(new char[]{'b'}, 0, 1);
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Jul 24 21:45:48 2009
@@ -28,6 +28,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
@@ -3521,47 +3522,21 @@
}
}
- private static class MyAnalyzer extends Analyzer {
-
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream s = new WhitespaceTokenizer(reader);
- s.addAttribute(PositionIncrementAttribute.class);
- return s;
- }
-
- }
-
// LUCENE-1255
public void testNegativePositions() throws Throwable {
SinkTokenizer tokens = new SinkTokenizer();
- tokens.addAttribute(TermAttribute.class);
- tokens.addAttribute(PositionIncrementAttribute.class);
-
- AttributeSource state = new AttributeSource();
- TermAttribute termAtt = (TermAttribute) state.addAttribute(TermAttribute.class);
- PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class);
- termAtt.setTermBuffer("a");
- posIncrAtt.setPositionIncrement(0);
- tokens.add(state);
-
- state = new AttributeSource();
- termAtt = (TermAttribute) state.addAttribute(TermAttribute.class);
- posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class);
-
- termAtt.setTermBuffer("b");
- posIncrAtt.setPositionIncrement(1);
- tokens.add(state);
-
- state = new AttributeSource();
- termAtt = (TermAttribute) state.addAttribute(TermAttribute.class);
- posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class);
-
- termAtt.setTermBuffer("c");
- posIncrAtt.setPositionIncrement(1);
- tokens.add(state);
+ Token t = new Token();
+ t.setTermBuffer("a");
+ t.setPositionIncrement(0);
+ tokens.add(t);
+ t.setTermBuffer("b");
+ t.setPositionIncrement(1);
+ tokens.add(t);
+ t.setTermBuffer("c");
+ tokens.add(t);
MockRAMDirectory dir = new MockRAMDirectory();
- IndexWriter w = new IndexWriter(dir, new MyAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+ IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field("field", tokens));
w.addDocument(doc);
Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java Fri Jul 24 21:45:48 2009
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
@@ -317,8 +318,8 @@
}
private static class EmptyTokenStream extends TokenStream {
- public Token next(final Token reusableToken) {
- return null;
+ public boolean incrementToken() throws IOException {
+ return false;
}
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/LuceneTestCase.java?rev=797665&r1=797664&r2=797665&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/LuceneTestCase.java Fri Jul 24 21:45:48 2009
@@ -44,7 +44,6 @@
protected void setUp() throws Exception {
ConcurrentMergeScheduler.setTestMode();
- TokenStream.setUseNewAPIDefault(true);
}
protected void tearDown() throws Exception {
Added: lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java?rev=797665&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java Fri Jul 24 21:45:48 2009
@@ -0,0 +1,122 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.*;
+
+import java.util.Iterator;
+
+public class TestAttributeSource extends LuceneTestCase {
+
+ public void testCaptureState() {
+ // init a first instance
+ AttributeSource src = new AttributeSource();
+ TermAttribute termAtt = (TermAttribute) src.addAttribute(TermAttribute.class);
+ TypeAttribute typeAtt = (TypeAttribute) src.addAttribute(TypeAttribute.class);
+ termAtt.setTermBuffer("TestTerm");
+ typeAtt.setType("TestType");
+ final int hashCode = src.hashCode();
+
+ AttributeSource.State state = src.captureState();
+
+ // modify the attributes
+ termAtt.setTermBuffer("AnotherTestTerm");
+ typeAtt.setType("AnotherTestType");
+ assertTrue("Hash code should be different", hashCode != src.hashCode());
+
+ src.restoreState(state);
+ assertEquals("TestTerm", termAtt.term());
+ assertEquals("TestType", typeAtt.type());
+ assertEquals("Hash code should be equal after restore", hashCode, src.hashCode());
+
+ // restore into an exact configured copy
+ AttributeSource copy = new AttributeSource();
+ copy.addAttribute(TermAttribute.class);
+ copy.addAttribute(TypeAttribute.class);
+ copy.restoreState(state);
+ assertEquals("Both AttributeSources should have same hashCode after restore", src.hashCode(), copy.hashCode());
+ assertEquals("Both AttributeSources should be equal after restore", src, copy);
+
+ // init a second instance (with attributes in different order and one additional attribute)
+ AttributeSource src2 = new AttributeSource();
+ typeAtt = (TypeAttribute) src2.addAttribute(TypeAttribute.class);
+ FlagsAttribute flagsAtt = (FlagsAttribute) src2.addAttribute(FlagsAttribute.class);
+ termAtt = (TermAttribute) src2.addAttribute(TermAttribute.class);
+ flagsAtt.setFlags(12345);
+
+ src2.restoreState(state);
+ assertEquals("TestTerm", termAtt.term());
+ assertEquals("TestType", typeAtt.type());
+ assertEquals("FlagsAttribute should not be touched", 12345, flagsAtt.getFlags());
+
+ // init a third instance missing one Attribute
+ AttributeSource src3 = new AttributeSource();
+ termAtt = (TermAttribute) src3.addAttribute(TermAttribute.class);
+ try {
+ src3.restoreState(state);
+ fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ // pass
+ }
+ }
+
+ public void testCloneAttributes() {
+ final AttributeSource src = new AttributeSource();
+ final TermAttribute termAtt = (TermAttribute) src.addAttribute(TermAttribute.class);
+ final TypeAttribute typeAtt = (TypeAttribute) src.addAttribute(TypeAttribute.class);
+ termAtt.setTermBuffer("TestTerm");
+ typeAtt.setType("TestType");
+
+ final AttributeSource clone = src.cloneAttributes();
+ final Iterator it = clone.getAttributeClassesIterator();
+ assertEquals("TermAttribute must be the first attribute", TermAttribute.class, it.next());
+ assertEquals("TypeAttribute must be the second attribute", TypeAttribute.class, it.next());
+ assertFalse("No more attributes", it.hasNext());
+
+ final TermAttribute termAtt2 = (TermAttribute) clone.getAttribute(TermAttribute.class);
+ final TypeAttribute typeAtt2 = (TypeAttribute) clone.getAttribute(TypeAttribute.class);
+ assertNotSame("TermAttribute of original and clone must be different instances", termAtt2, termAtt);
+ assertNotSame("TypeAttribute of original and clone must be different instances", typeAtt2, typeAtt);
+ assertEquals("TermAttribute of original and clone must be equal", termAtt2, termAtt);
+ assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt);
+ }
+
+ public void testToStringAndMultiAttributeImplementations() {
+ AttributeSource src = new AttributeSource();
+ TermAttribute termAtt = (TermAttribute) src.addAttribute(TermAttribute.class);
+ TypeAttribute typeAtt = (TypeAttribute) src.addAttribute(TypeAttribute.class);
+ termAtt.setTermBuffer("TestTerm");
+ typeAtt.setType("TestType");
+ assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString());
+
+ src = new AttributeSource();
+ src.addAttributeImpl(new Token());
+ // this should not add a new attribute as Token implements TermAttribute, too
+ termAtt = (TermAttribute) src.addAttribute(TermAttribute.class);
+ assertTrue("TermAttribute should be implemented by Token", termAtt instanceof Token);
+ // get the Token attribute and check, that it is the only one
+ final Iterator it = src.getAttributeImplsIterator();
+ Token tok = (Token) it.next();
+ assertFalse("There should be only one attribute implementation instance", it.hasNext());
+
+ termAtt.setTermBuffer("TestTerm");
+ assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString());
+ }
+
+}
Propchange: lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java
------------------------------------------------------------------------------
svn:eol-style = native