You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by jo...@apache.org on 2009/07/24 10:58:55 UTC

svn commit: r797373 [4/8] - in /incubator/uima/sandbox/trunk/Lucas: ./ docbook/ docbook/LuceneCASConsumerUserGuide/ docbook/LuceneCASConsumerUserGuide/images/ src/main/java/org/apache/uima/lucas/consumer/ src/main/java/org/apache/uima/lucas/indexer/ sr...

Modified: incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml Fri Jul 24 08:58:52 2009
@@ -21,66 +21,20 @@
 
 <casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <implementationName>de.julielab.jules.consumer.LuceneCASIndexer</implementationName>
+  <implementationName>org.apache.uima.consumer.LuceneCASIndexer</implementationName>
   <processingResourceMetaData>
-    <name>LuceneCASIndexerDescriptor</name>
+    <name>LuceneCASIndexer</name>
     <description/>
-    <version>2.0</version>
-    <vendor>julielab</vendor>
+    <version>0.6</version>
+    <vendor>apache</vendor>
     <configurationParameters>
       <configurationParameter>
-        <name>indexOutDir</name>
-        <description>defines the output directory where the index should be written</description>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
         <name>mappingFile</name>
         <description>path to the mapping file</description>
         <type>String</type>
         <multiValued>false</multiValued>
         <mandatory>true</mandatory>
       </configurationParameter>
-      <configurationParameter>
-        <name>stopwordFile</name>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>hypernymFile</name>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>tokenMappingFile</name>
-        <description>Mapping file for replacement of  tokens.</description>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>uniqueIndex</name>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>ramBufferSize</name>
-        <description>Sets the ram buffer size of the index writer. See lucene docs for further information.</description>
-        <type>Integer</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>compoundFileFormat</name>
-        <description>Determines wether the index writer should use compound file format or not.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
     </configurationParameters>
     <configurationParameterSettings>
       <nameValuePair>
@@ -89,50 +43,6 @@
           <string>src/test/resources/lucas.xml</string>
         </value>
       </nameValuePair>
-      <nameValuePair>
-        <name>indexOutDir</name>
-        <value>
-          <string>src/test/resources/testIndex</string>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>stopwordFile</name>
-        <value>
-          <string>src/test/resources/stopwords.txt</string>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>hypernymFile</name>
-        <value>
-          <string>src/test/resources/hypernyms.txt</string>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>tokenMappingFile</name>
-        <value>
-          <array>
-            <string>src/test/resources/tokenMapping.txt</string>
-          </array>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>uniqueIndex</name>
-        <value>
-          <boolean>true</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>ramBufferSize</name>
-        <value>
-          <integer>512</integer>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>compoundFileFormat</name>
-        <value>
-          <boolean>true</boolean>
-        </value>
-      </nameValuePair>
     </configurationParameterSettings>
     <typeSystemDescription/>
     <typePriorities/>
@@ -146,9 +56,26 @@
     </capabilities>
     <operationalProperties>
       <modifiesCas>false</modifiesCas>
-      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
       <outputsNewCASes>false</outputsNewCASes>
     </operationalProperties>
   </processingResourceMetaData>
-  <resourceManagerConfiguration/>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>LucasIndexWriterProvider</name>
+        <description>Provides a index writer instance.</description>
+        <fileResourceSpecifier>
+          <fileUrl>file:src/test/resources/IndexWriter.properties</fileUrl>
+        </fileResourceSpecifier>
+        <implementationName>org.apache.uima.indexer.IndexWriterProviderImpl</implementationName>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>indexWriterProvider</key>
+        <resourceName>LucasIndexWriterProvider</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
 </casConsumerDescription>

Modified: incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd Fri Jul 24 08:58:52 2009
@@ -62,44 +62,68 @@
     		<xs:enumeration value="first"/>
     		<xs:enumeration value="last"/>
   		</xs:restriction>
-	</xs:simpleType>	
+	</xs:simpleType>
+
+	<xs:element name="filters">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="filter" minOccurs="1" maxOccurs="unbounded"/>
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
+	
+	<xs:element name="filter">
+		<xs:complexType>
+			<xs:attribute name="className" type="xs:string"/>
+			<xs:attribute name="factoryClassName" type="xs:string"/>
+			<xs:attribute name="reuseFactory" type="xs:boolean"/>
+			<xs:attribute name="name" type="xs:string"/>
+			<xs:anyAttribute processContents="lax"/>
+		</xs:complexType>				
+	</xs:element>
+		
+	<xs:element name="features">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="feature" minOccurs="1" maxOccurs="unbounded"/>
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
 	
 	<xs:element name="feature">
 		<xs:complexType>
 			<xs:attribute name="name" type="xs:string"/>
-			<xs:attribute name="uppercase" type="xs:boolean"/>
-			<xs:attribute name="lowercase" type="xs:boolean"/>
 			<xs:attribute name="numberFormat" type="xs:string"/>
 		</xs:complexType>
 	</xs:element>
+
+	<xs:element name="annotations">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="annotation" minOccurs="1" maxOccurs="unbounded"/>
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
 	
 	<xs:element name="annotation">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="feature" minOccurs="0" maxOccurs="unbounded"/>
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1"/>
+				<xs:element ref="features" minOccurs="1" maxOccurs="1"/>
 			</xs:sequence>				
 			<xs:attribute name="type" type="xs:string"/>
 			<xs:attribute name="sofa" type="xs:string"/>
 			<xs:attribute name="featurePath" type="xs:string"/>
-			<xs:attribute name="concatString" type="xs:string"/>
-			<xs:attribute name="splitString" type="xs:string"/>
-			<xs:attribute name="prefix" type="xs:string"/>
-			<xs:attribute name="uppercase" type="xs:boolean"/>
-			<xs:attribute name="lowercase" type="xs:boolean"/>
-			<xs:attribute name="stopwordRemove" type="xs:boolean"/>
-			<xs:attribute name="position" type="positionType"/>
-			<xs:attribute name="addHypernyms" type="xs:boolean"/>
-			<xs:attribute name="mappingFile" type="xs:string"/>
-			<xs:attribute name="snowballFilter" type="xs:string"/>
-			<xs:attribute name="unique" type="xs:boolean"/>
-			<xs:attribute name="tokenizer" type="tokenizerType"/>			
+			<xs:attribute name="tokenizer" type="tokenizerType"/>	
+			<xs:attribute name="featureValueDelimiterString" type="xs:string"/>		
 		</xs:complexType>
 	</xs:element>
 
 	<xs:element name="field">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="annotation" maxOccurs="unbounded"/>
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1"/>
+				<xs:element ref="annotations" minOccurs="1" maxOccurs="1"/>
 			</xs:sequence>				
 			<xs:attribute name="name" type="xs:string"/>
 			<xs:attribute name="index" type="indexType"/>

Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java Fri Jul 24 08:58:52 2009
@@ -19,146 +19,106 @@
 
 package org.apache.uima.lucas.consumer;
 
-import static org.junit.Assert.assertArrayEquals;
+import static org.easymock.EasyMock.capture;
+import static org.easymock.classextension.EasyMock.createMock;
+import static org.easymock.classextension.EasyMock.replay;
+import static org.easymock.classextension.EasyMock.verify;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.File;
-import java.lang.management.ManagementFactory;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
+import java.io.IOException;
 import java.util.Collection;
-import java.util.List;
-import java.util.Map;
+import java.util.Properties;
 
 import org.apache.lucene.store.FSDirectory;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.collection.CasConsumerDescription;
-import org.apache.uima.lucas.consumer.LuceneCASIndexer;
-import org.apache.uima.lucas.indexer.FieldDescription;
+import org.apache.uima.lucas.indexer.analysis.TokenFilterFactory;
+import org.apache.uima.lucas.indexer.mapping.FieldDescription;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
+import org.easymock.Capture;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-public class LuceneCASIndexerTest {
-
-  private static final String TOKEN_MAPPING_TXT = "tokenMapping.txt";
-
-  private static final String HYPERNYM_ID1 = "id1";
+import com.google.common.collect.Maps;
 
-  private static final Object HYPERNYM_ID2 = "id2";
-
-  private static final String[] STOP_WORDS = new String[] { "na", "und", "nu" };
+public class LuceneCASIndexerTest {
 
+  private static final String TEST_FILTER_ANNOTATION = "testFilterAnnotation";
+  private static final String TEST_FILTER_FIELD = "testFilterField";
   private static final String FIELD_NAME = "annotation1";
+	private static final String DESCRIPTOR_FILE = "src/main/resources/LuceneCASIndexer.xml";
+	private static final String INDEX_DIRECTORY = "src/test/resources/test-index";
 
-  private static final String WRITE_LOCK = "write.lock";
-
-  private static final String DESCRIPTOR_FILE = "src/main/resources/LuceneCASIndexer.xml";
-
-  private static final String INDEX_DIRECTORY = "src/test/resources/testIndex";
-
-  private LuceneCASIndexer consumer;
-
-  @Before
-  public void setUp() throws Exception {
-    CasConsumerDescription consumerDescription =
-            (CasConsumerDescription) UIMAFramework.getXMLParser().parseCasConsumerDescription(
-                    new XMLInputSource(DESCRIPTOR_FILE));
-    consumer = (LuceneCASIndexer) UIMAFramework.produceCasConsumer(consumerDescription);
-  }
-
-  @After
-  public void tearDown() throws Exception {
-
-    FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
-    File directoryFile = directory.getFile();
-    consumer.destroy();
-
-    directory = FSDirectory.getDirectory(directoryFile);
-
-    // directory.deleteFile(WRITE_LOCK);
-    for (String file : directory.list())
-      directory.deleteFile(file);
-
-    directory.getFile().delete();
-  }
-
-  @Test
-  public void testIndexOutDir() {
-    FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
-
-    String path = directory.getFile().getPath();
-    assertTrue(path.contains(INDEX_DIRECTORY));
-  }
-
-  @Test
-  public void testMappingFile() {
-    Collection<FieldDescription> fieldDescriptions = consumer.getFieldDescriptions();
-    assertEquals(1, fieldDescriptions.size());
-    FieldDescription fieldDescription = fieldDescriptions.iterator().next();
-    assertEquals(FIELD_NAME, fieldDescription.getName());
-    assertEquals(2, fieldDescription.getAnnotationDescriptions().size());
-  }
-
-  @Test
-  public void testStopwordFile() {
-    String[] stopwords = consumer.getFilterBuilder().getStopwords();
-    assertArrayEquals(STOP_WORDS, stopwords);
-  }
-
-  @Test
-  public void testHypernymFile() {
-    Map<String, List<String>> hypernyms = consumer.getFilterBuilder().getHypernyms();
-    assertEquals(2, hypernyms.size());
-    assertTrue(hypernyms.containsKey(HYPERNYM_ID1));
-    assertTrue(hypernyms.containsKey(HYPERNYM_ID2));
-  }
-
-  @Test
-  public void testTokenMappingFile() {
-    Map<String, Map<String, String>> tokenMappings = consumer.getFilterBuilder().getMappings();
-    assertTrue(tokenMappings.containsKey(TOKEN_MAPPING_TXT));
-    assertEquals(2, tokenMappings.get(TOKEN_MAPPING_TXT).size());
-  }
-
-  @Test
-  public void testUniqueIndex() {
-    String hostname = getHostName();
-    String pid = getPID();
-
-    FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
-
-    String path = directory.getFile().getPath();
-    assertTrue(path.endsWith(INDEX_DIRECTORY + "-" + hostname + "-" + pid));
-  }
-
-  @Test
-  public void testRamBufferSize() {
-    assertEquals(512, consumer.getIndexWriter().getRAMBufferSizeMB(), 0);
-  }
-
-  @Test
-  public void testCompoundFileFormat() {
-    assertTrue(consumer.getIndexWriter().getUseCompoundFile());
-  }
-
-  protected String getPID() {
-    String id = ManagementFactory.getRuntimeMXBean().getName();
-    return id.substring(0, id.indexOf("@"));
-  }
-
-  public String getHostName() {
-    InetAddress address;
-    String hostName;
-    try {
-      address = InetAddress.getLocalHost();
-      hostName = address.getHostName();
-    } catch (UnknownHostException e) {
-      throw new IllegalStateException(e);
-    }
+	private LuceneCASIndexer consumer;
 
-    return hostName;
-  }
+	@Before
+	public void setUp() throws InvalidXMLException, IOException, ResourceInitializationException{
+		
+		  CasConsumerDescription consumerDescription = (CasConsumerDescription) UIMAFramework.getXMLParser().parseCasConsumerDescription(new XMLInputSource(DESCRIPTOR_FILE));
+		  consumer = (LuceneCASIndexer) UIMAFramework.produceCasConsumer(consumerDescription);
+	}
+	
+	@After
+	public void tearDown() throws Exception{
+		FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
+		File directoryFile = directory.getFile();
+		consumer.destroy();
+		
+		directory = FSDirectory.getDirectory(directoryFile);
+		
+		for( String file: directory.list() )
+			directory.deleteFile(file);
+
+		directory.getFile().delete();
+	}
+	
+	@Test
+	public void testIndexOutDir(){
+		FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
+		
+		String path = directory.getFile().getPath();
+		assertTrue(path.contains(INDEX_DIRECTORY));
+	}
+	
+	@Test
+	public void testMappingFile(){
+		Collection<FieldDescription> fieldDescriptions = consumer.getFieldDescriptions();
+		assertEquals(1, fieldDescriptions.size());
+		FieldDescription fieldDescription = fieldDescriptions.iterator().next();
+		assertEquals(FIELD_NAME, fieldDescription.getName());
+		assertEquals(2, fieldDescription.getAnnotationDescriptions().size());
+	}
+	
+	@Test
+	public void testPreloadResources() throws IOException{
+	  Collection<FieldDescription> fieldDescriptions = consumer.getFieldDescriptions();
+	  TokenFilterFactory testFactoryField = createMock(TokenFilterFactory.class);
+    TokenFilterFactory testFactoryAnnotation = createMock(TokenFilterFactory.class);
+    
+	  Capture<Properties> propertiesCaptureField = new Capture<Properties>();
+	  Capture<Properties> propertiesCaptureAnnotation = new Capture<Properties>();
+	  
+	  testFactoryField.preloadResources(capture(propertiesCaptureField));
+	  testFactoryAnnotation.preloadResources(capture(propertiesCaptureAnnotation));
+	  
+	  replay(testFactoryField);
+	  replay(testFactoryAnnotation);
+	  
+	  consumer.preloadResources(fieldDescriptions, Maps.immutableBiMap(TEST_FILTER_ANNOTATION, testFactoryAnnotation, 
+	                                                                   TEST_FILTER_FIELD, testFactoryField));	  
+	  verify(testFactoryField);
+	  verify(testFactoryAnnotation);
+	  
+	  Properties fieldFilterProperties = propertiesCaptureField.getValue();
+	  assertEquals("value1", fieldFilterProperties.getProperty("key1"));
+	  
+    Properties annotationFilterProperties = propertiesCaptureAnnotation.getValue();
+    assertEquals("value2", annotationFilterProperties.getProperty("key2"));
+	}
+	
 }

Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -19,8 +19,7 @@
 
 package org.apache.uima.lucas.indexer;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
 import java.text.DecimalFormat;
 
@@ -29,111 +28,101 @@
 import org.apache.uima.cas.Type;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.collection.CollectionReaderDescription;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
 import org.apache.uima.lucas.indexer.AnnotationTokenStreamBuilder;
-import org.apache.uima.lucas.indexer.FeatureDescription;
 import org.apache.uima.lucas.indexer.analysis.AnnotationTokenStream;
+import org.apache.uima.lucas.indexer.mapping.AnnotationDescription;
+import org.apache.uima.lucas.indexer.mapping.FeatureDescription;
 import org.apache.uima.lucas.indexer.types.test.Annotation1;
 import org.apache.uima.lucas.indexer.types.test.FeatureStructure1;
 import org.apache.uima.lucas.indexer.types.test.FeatureStructure2;
+import org.apache.uima.jcas.JCas;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
 import org.junit.Before;
 import org.junit.Test;
 
-public class AnnotationTokenStreamBuilderTest {
-  private final static String READER_DESCRIPTOR =
-          "src/test/resources/AnnotationTokenStreamTestDummyCollectionReader.xml";
-
-  private static final String DOCUMENT_TEXT = "token1 token2 token3";
 
-  private CollectionReader reader;
-
-  private JCas cas;
-
-  private AnnotationDescription annotationDescription;
-
-  private AnnotationTokenStreamBuilder annotationTokenStreamBuilder;
-
-  private Type annotation1Type;
-
-  private Annotation1 annotation1;
-
-  @Before
-  public void setUp() throws Exception {
-    CollectionReaderDescription readerDescription =
-            (CollectionReaderDescription) UIMAFramework.getXMLParser()
-                    .parseCollectionReaderDescription(new XMLInputSource(READER_DESCRIPTOR));
-    reader = UIMAFramework.produceCollectionReader(readerDescription);
-    cas = CasCreationUtils.createCas(reader.getProcessingResourceMetaData()).getJCas();
-    cas.setDocumentText(DOCUMENT_TEXT);
-    annotation1 = new Annotation1(cas);
-    annotation1.setBegin(0);
-    annotation1.setEnd(6);
-    annotation1.addToIndexes();
-
-    annotation1Type = annotation1.getType();
-    annotationDescription = new AnnotationDescription(annotation1Type.getName());
-    annotationTokenStreamBuilder = new AnnotationTokenStreamBuilder();
-  }
-
-  @Test
-  public void testBuildCoveredTextAnnotationTokenStream() throws Exception {
-    AnnotationTokenStream annotationTokenStream =
-            annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
-    assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
-    assertEquals(0, annotationTokenStream.getFeatureNames().size());
-    assertEquals(0, annotationTokenStream.getFeatureFormats().size());
-    assertEquals("token1", annotationTokenStream.next(new Token()).term());
-  }
-
-  @Test
-  public void testBuildFeatureAnnotationTokenStream() throws Exception {
-    annotation1.setFeatureString("token1Feature1");
-    annotationDescription.getFeatureDescriptions().add(new FeatureDescription("featureString"));
-    AnnotationTokenStream annotationTokenStream =
-            annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
-    assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
-    assertEquals(1, annotationTokenStream.getFeatureNames().size());
-    assertEquals(0, annotationTokenStream.getFeatureFormats().size());
-    assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
-  }
-
-  @Test
-  public void testBuildFeatureAnnotationTokenStreamWithFormat() throws Exception {
-    annotation1.setFeatureInteger(3);
-
-    FeatureDescription featureDescription = new FeatureDescription("featureInteger");
-    featureDescription.setNumberFormat("##");
-
-    annotationDescription.getFeatureDescriptions().add(featureDescription);
-    AnnotationTokenStream annotationTokenStream =
-            annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
-    assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
-    assertEquals(1, annotationTokenStream.getFeatureNames().size());
-    assertEquals(1, annotationTokenStream.getFeatureFormats().size());
-    assertTrue(annotationTokenStream.getFeatureFormats().get("featureInteger") instanceof DecimalFormat);
-    assertEquals("3", annotationTokenStream.next(new Token()).term());
-  }
-
-  @Test
-  public void testBuildFeaturePathAnnotationTokenStream() throws Exception {
-    FeatureStructure1 featureStructure1 = new FeatureStructure1(cas);
-    featureStructure1.setFeature1("token1Feature1");
-    FeatureStructure2 featureStructure2 = new FeatureStructure2(cas);
-    featureStructure2.setFeature1("token1Feature3Feature1");
-    featureStructure1.setFeature3(featureStructure2);
-    annotation1.setFeatureStructure1(featureStructure1);
-
-    annotationDescription.getFeatureDescriptions().add(new FeatureDescription("feature1"));
-    annotationDescription.setFeaturePath("featureStructure1");
-    AnnotationTokenStream annotationTokenStream =
-            annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
-    assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
-    assertEquals(1, annotationTokenStream.getFeatureNames().size());
-    assertEquals(0, annotationTokenStream.getFeatureFormats().size());
-    assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
-  }
+public class AnnotationTokenStreamBuilderTest {
+	private final static String READER_DESCRIPTOR = "src/test/resources/AnnotationTokenStreamTestDummyCollectionReader.xml";
+	private static final String DOCUMENT_TEXT = "token1 token2 token3";
+	private CollectionReader reader;
+	private JCas cas;
+	private AnnotationDescription annotationDescription;
+	private AnnotationTokenStreamBuilder annotationTokenStreamBuilder;
+	private Type annotation1Type;
+	private Annotation1 annotation1;
+	
+	@Before
+	public  void setUp() throws Exception {
+		CollectionReaderDescription readerDescription = (CollectionReaderDescription) UIMAFramework.getXMLParser().parseCollectionReaderDescription(new XMLInputSource(READER_DESCRIPTOR));
+		reader = UIMAFramework.produceCollectionReader(readerDescription);
+		cas = CasCreationUtils.createCas(reader.getProcessingResourceMetaData()).getJCas();
+		cas.setDocumentText(DOCUMENT_TEXT);
+		annotation1 = new Annotation1(cas);
+		annotation1.setBegin(0);
+		annotation1.setEnd(6);
+		annotation1.addToIndexes();
+	
+		annotation1Type = annotation1.getType();
+		annotationDescription = new AnnotationDescription();
+		annotationDescription.setType(annotation1Type.getName());
+		annotationTokenStreamBuilder = new AnnotationTokenStreamBuilder();
+	}
+	
+	@Test
+	public void testBuildCoveredTextAnnotationTokenStream() throws Exception{
+		AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+		assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+		assertEquals(0, annotationTokenStream.getFeatureNames().size());
+		assertEquals(0, annotationTokenStream.getFeatureFormats().size());
+		assertEquals("token1", annotationTokenStream.next(new Token()).term());
+	}
+	
+	@Test
+	public void testBuildFeatureAnnotationTokenStream() throws Exception{
+		annotation1.setFeatureString("token1Feature1");
+		annotationDescription.getFeatureDescriptions().add(new FeatureDescription("featureString"));
+		AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+		assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+		assertEquals(1, annotationTokenStream.getFeatureNames().size());
+		assertEquals(0, annotationTokenStream.getFeatureFormats().size());
+		assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
+	}
+
+	@Test
+	public void testBuildFeatureAnnotationTokenStreamWithFormat() throws Exception{
+		annotation1.setFeatureInteger(3);
+		
+		FeatureDescription featureDescription = new FeatureDescription("featureInteger");
+		featureDescription.setNumberFormat("##");
+		
+		annotationDescription.getFeatureDescriptions().add(featureDescription);
+		AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+		assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+		assertEquals(1, annotationTokenStream.getFeatureNames().size());
+		assertEquals(1, annotationTokenStream.getFeatureFormats().size());
+		assertTrue(annotationTokenStream.getFeatureFormats().get("featureInteger") instanceof DecimalFormat);
+		assertEquals("3", annotationTokenStream.next(new Token()).term());
+	}
+	
+	@Test
+	public void testBuildFeaturePathAnnotationTokenStream() throws Exception{
+		FeatureStructure1 featureStructure1 = new FeatureStructure1(cas);
+		featureStructure1.setFeature1("token1Feature1");
+		FeatureStructure2 featureStructure2 = new FeatureStructure2(cas);
+		featureStructure2.setFeature1("token1Feature3Feature1");
+		featureStructure1.setFeature3(featureStructure2);
+		annotation1.setFeatureStructure1(featureStructure1);
+		
+		annotationDescription.getFeatureDescriptions().add(new FeatureDescription("feature1"));
+		annotationDescription.setFeaturePath("featureStructure1");
+		AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+		assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+		assertEquals(1, annotationTokenStream.getFeatureNames().size());
+		assertEquals(0, annotationTokenStream.getFeatureFormats().size());
+		assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
+	}
 
+	
+	
 }

Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -36,31 +36,28 @@
 
 public class DocumentBuilderTest {
 
-  private Field field1;
-
-  private Field field2;
-
-  private DocumentBuilder documentBuilder;
-
-  private Collection<Field> fields;
-
-  @Before
-  public void setUp() {
-    documentBuilder = new DocumentBuilder();
-
-    field1 = new Field("field1", createMock(TokenStream.class));
-    field2 = new Field("field2", createMock(TokenStream.class));
-    fields = new ArrayList<Field>();
-    fields.add(field1);
-    fields.add(field2);
-  }
-
-  @Test
-  public void testCreateDocument() throws CASException, IOException {
-    Document document = documentBuilder.createDocument(fields);
-
-    assertEquals(field1, document.getField("field1"));
-    assertEquals(field2, document.getField("field2"));
-  }
-
+	private Field field1;
+	private Field field2;
+	private DocumentBuilder documentBuilder;
+	private Collection<Field> fields;
+
+	@Before
+	public void setUp(){
+		documentBuilder = new DocumentBuilder();
+		
+		field1 = new Field("field1", createMock(TokenStream.class));
+		field2 = new Field("field2", createMock(TokenStream.class));
+		fields = new ArrayList<Field>();		 
+		fields.add(field1);
+		fields.add(field2);
+	}
+	
+	@Test
+	public void testCreateDocument() throws CASException, IOException{
+		Document document = documentBuilder.createDocument(fields);
+		
+		assertEquals(field1, document.getField("field1"));
+		assertEquals(field2, document.getField("field2"));
+	}
+	
 }

Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.analysis.TokenFilterFactory;
+
+public class DummyTokenFilterFactory implements TokenFilterFactory {
+
+  public TokenFilter createTokenFilter(TokenStream tokenStream, Properties properties) {
+    
+    return new LowerCaseFilter(tokenStream);
+  }
+
+  public void preloadResources(Properties properties) throws IOException {
+  }
+
+}

Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -19,13 +19,18 @@
 
 package org.apache.uima.lucas.indexer;
 
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.isA;
 import static org.easymock.classextension.EasyMock.createMock;
+import static org.easymock.classextension.EasyMock.replay;
+import static org.easymock.classextension.EasyMock.verify;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 
@@ -33,11 +38,10 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Field;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
-import org.apache.uima.lucas.indexer.FieldBuilder;
-import org.apache.uima.lucas.indexer.FieldDescription;
 import org.apache.uima.lucas.indexer.analysis.TokenStreamConcatenator;
 import org.apache.uima.lucas.indexer.analysis.TokenStreamMerger;
+import org.apache.uima.lucas.indexer.mapping.AnnotationDescription;
+import org.apache.uima.lucas.indexer.mapping.FieldDescription;
 import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
 import org.junit.Before;
 import org.junit.Test;
@@ -46,298 +50,351 @@
 
 public class FieldBuilderTest {
 
-  private FieldBuilder fieldBuilder;
-
-  private FieldDescription fieldDescription;
-
-  private AnnotationDescription annotationDescription1;
-
-  private AnnotationDescription annotationDescription2;
-
-  private JCas cas;
-
-  private TokenStream tokenStream1;
-
-  private TokenStream tokenStream2;
-
-  private List<TokenStream> tokenStreams;
-
-  @Before
-  public void setUp() {
-    annotationDescription1 = new AnnotationDescription("uima.cas.Annotation");
-    annotationDescription2 = new AnnotationDescription("uima.cas.Annotation");
-    fieldBuilder = new FieldBuilder();
-    cas = createMock(JCas.class);
-
-    Collection<Token> tokens1 = new ArrayList<Token>();
-    tokens1.add(new Token("token1".toCharArray(), 0, 6, 0, 6));
-    tokens1.add(new Token("token2".toCharArray(), 0, 6, 7, 13));
-    tokens1.add(new Token("token3".toCharArray(), 0, 6, 14, 20));
-
-    Collection<Token> tokens2 = new ArrayList<Token>();
-    tokens2.add(new Token("token4".toCharArray(), 0, 6, 0, 6));
-    tokens2.add(new Token("token5".toCharArray(), 0, 6, 7, 13));
-    tokens2.add(new Token("token6".toCharArray(), 0, 6, 14, 20));
-
-    tokenStream1 = new CollectionTokenStream(tokens1);
-    tokenStream2 = new CollectionTokenStream(tokens2);
-
-    tokenStreams = Lists.newArrayList(tokenStream1, tokenStream2);
-
-    fieldDescription = new FieldDescription("field1");
-    fieldDescription.getAnnotationDescriptions().add(annotationDescription1);
-    fieldDescription.getAnnotationDescriptions().add(annotationDescription2);
-  }
-
-  @Test
-  public void testCreateFieldConcatenated() throws Exception {
-
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertTrue(field1.tokenStreamValue() instanceof TokenStreamConcatenator);
-
-  }
-
-  @Test
-  public void testCreateFieldMerged() throws Exception {
-
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertTrue(field1.tokenStreamValue() instanceof TokenStreamMerger);
-  }
-
-  @Test
-  public void testCreateFieldNoIndex() throws Exception {
-
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    assertEquals(0, fields.size());
-  }
-
-  @Test
-  public void testCreateFieldNoNorms() throws Exception {
-
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertTrue(field1.getOmitNorms());
-    assertTrue(field1.isIndexed());
-    assertFalse(field1.isStored());
-  }
-
-  @Test
-  public void testCreateFieldNoTF() throws Exception {
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_TF);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertTrue(field1.getOmitTf());
-    assertFalse(field1.getOmitNorms());
-    assertTrue(field1.isIndexed());
-    assertFalse(field1.isStored());
-  }
-
-  @Test
-  public void testCreateFieldNoNormsTF() throws Exception {
-
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS_TF);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertTrue(field1.getOmitTf());
-    assertTrue(field1.getOmitNorms());
-    assertTrue(field1.isIndexed());
-    assertFalse(field1.isStored());
-  }
-
-  @Test
-  public void testCreateFieldTermVector() throws Exception {
-
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_YES);
-    tokenStreams.remove(1);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertFalse(field1.isStoreOffsetWithTermVector());
-    assertTrue(field1.isTermVectorStored());
-    assertFalse(field1.isStorePositionWithTermVector());
-  }
-
-  @Test
-  public void testCreateFieldTermVectorOffset() throws Exception {
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_OFFSETS);
-    tokenStreams.remove(1);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertTrue(field1.isStoreOffsetWithTermVector());
-    assertTrue(field1.isTermVectorStored());
-    assertFalse(field1.isStorePositionWithTermVector());
-  }
-
-  @Test
-  public void testCreateFieldTermVectorPositions() throws Exception {
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS);
-    tokenStreams.remove(1);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertFalse(field1.isStoreOffsetWithTermVector());
-    assertTrue(field1.isTermVectorStored());
-    assertTrue(field1.isStorePositionWithTermVector());
-  }
-
-  @Test
-  public void testCreateFieldTermVectorOffsetPositions() throws Exception {
-
-    fieldDescription.setMerge(true);
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
-    tokenStreams.remove(1);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertTrue(field1.isStoreOffsetWithTermVector());
-    assertTrue(field1.isTermVectorStored());
-    assertTrue(field1.isStorePositionWithTermVector());
-  }
-
-  @Test
-  public void testCreateFieldIndexStored() throws Exception {
-
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
-    tokenStreams.remove(1);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    assertEquals(4, fields.size());
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertFalse(field1.isIndexed());
-    assertTrue(field1.isStored());
-    assertEquals("token1", field1.stringValue());
-
-    Field field2 = fieldIterator.next();
-    assertEquals("field1", field2.name());
-    assertFalse(field2.isIndexed());
-    assertTrue(field2.isStored());
-    assertEquals("token2", field2.stringValue());
-
-    Field field3 = fieldIterator.next();
-    assertEquals("field1", field3.name());
-    assertFalse(field3.isIndexed());
-    assertTrue(field3.isStored());
-    assertEquals("token3", field3.stringValue());
-
-    Field field4 = fieldIterator.next();
-    assertEquals("field1", field4.name());
-    assertTrue(field4.isIndexed());
-    assertFalse(field4.isStored());
-  }
-
-  @Test
-  public void testCreateFieldIndexStoredDelimiter() throws Exception {
-
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
-    fieldDescription.setDelimiter(" ");
-    tokenStreams.remove(1);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    assertEquals(2, fields.size());
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertFalse(field1.isIndexed());
-    assertTrue(field1.isStored());
-    assertEquals("token1 token2 token3", field1.stringValue());
-
-    Field field2 = fieldIterator.next();
-    assertEquals("field1", field2.name());
-    assertTrue(field2.isIndexed());
-    assertFalse(field2.isStored());
-  }
-
-  @Test
-  public void testCreateFieldIndexStoredCompress() throws Exception {
-    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
-    fieldDescription.setStored(FieldBuilder.FIELD_STORE_COMPRESS);
-    tokenStreams.remove(1);
-
-    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
-    assertEquals(4, fields.size());
-    Iterator<Field> fieldIterator = fields.iterator();
-    Field field1 = fieldIterator.next();
-    assertEquals("field1", field1.name());
-    assertFalse(field1.isIndexed());
-    assertTrue(field1.isStored());
-    assertTrue(field1.isCompressed());
-    assertEquals("token1", field1.stringValue());
-
-    Field field2 = fieldIterator.next();
-    assertEquals("field1", field2.name());
-    assertFalse(field2.isIndexed());
-    assertTrue(field2.isStored());
-    assertTrue(field2.isCompressed());
-    assertEquals("token2", field2.stringValue());
-
-    Field field3 = fieldIterator.next();
-    assertEquals("field1", field3.name());
-    assertFalse(field3.isIndexed());
-    assertTrue(field3.isStored());
-    assertTrue(field3.isCompressed());
-    assertEquals("token3", field3.stringValue());
-
-    Field field4 = fieldIterator.next();
-    assertEquals("field1", field4.name());
-    assertTrue(field4.isIndexed());
-    assertFalse(field4.isStored());
-  }
+	private FieldBuilder fieldBuilder;
+	private FieldDescription fieldDescription;
+	private AnnotationDescription annotationDescription1;
+	private AnnotationDescription annotationDescription2;
+	private JCas cas;
+	
+	private TokenStream tokenStream1;
+	private TokenStream tokenStream2;
+	private List<TokenStream> tokenStreams;
+	private FilterBuilder filterBuilder;
+	
+	@Before
+	public void setUp(){
+		annotationDescription1= new AnnotationDescription("uima.cas.Annotation");		
+		annotationDescription2= new AnnotationDescription("uima.cas.Annotation");
+		filterBuilder = createMock(FilterBuilder.class);
+		fieldBuilder = new FieldBuilder(filterBuilder);
+		cas = createMock(JCas.class);
+
+		Collection<Token> tokens1 = new ArrayList<Token>();
+		tokens1.add(new Token("token1".toCharArray(),0,6,0,6));
+		tokens1.add(new Token("token2".toCharArray(),0,6,7,13));
+		tokens1.add(new Token("token3".toCharArray(),0,6,14,20));
+
+		Collection<Token> tokens2 = new ArrayList<Token>();
+		tokens2.add(new Token("token4".toCharArray(),0,6,0,6));
+		tokens2.add(new Token("token5".toCharArray(),0,6,7,13));
+		tokens2.add(new Token("token6".toCharArray(),0,6,14,20));
+
+		tokenStream1 = new CollectionTokenStream(tokens1);
+		tokenStream2 = new CollectionTokenStream(tokens2);
+
+		tokenStreams = Lists.newArrayList(tokenStream1, tokenStream2);
+		
+		fieldDescription = new FieldDescription("field1");
+		fieldDescription.getAnnotationDescriptions().add(annotationDescription1);
+		fieldDescription.getAnnotationDescriptions().add(annotationDescription2);
+		fieldDescription.setFilterDescriptions(Collections.EMPTY_LIST);
+	}
+	
+	@Test
+	public void testCreateFieldConcatenated() throws Exception{
+		
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+
+		TokenStream tokenStream = createMock(TokenStream.class);
+		expect(filterBuilder.filter(isA(TokenStreamConcatenator.class), isA(Collection.class))).andReturn(tokenStream);
+		replay(filterBuilder);
+		
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+		verify(filterBuilder);
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertEquals(tokenStream, field1.tokenStreamValue());
+
+	}
+	
+	@Test
+	public void testCreateFieldMerged() throws Exception{
+		
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+    TokenStream tokenStream = createMock(TokenStream.class);
+    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+    replay(filterBuilder);
+    
+    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+    verify(filterBuilder);
+
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+    assertEquals(tokenStream, field1.tokenStreamValue());
+	}
+
+	@Test
+	public void testCreateFieldNoIndex() throws Exception{
+		
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO);
+		
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+		
+		assertEquals(0, fields.size());
+	}
+
+	@Test
+	public void testCreateFieldNoNorms() throws Exception{
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS);
+
+		TokenStream tokenStream = createMock(TokenStream.class);
+    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+    replay(filterBuilder);
+		
+    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+		
+    verify(filterBuilder);
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertTrue(field1.getOmitNorms());
+		assertTrue(field1.isIndexed());
+		assertFalse(field1.isStored());
+	}
+	
+	@Test
+	public void testCreateFieldNoTF() throws Exception{
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_TF);
+		
+		TokenStream tokenStream = createMock(TokenStream.class);
+		expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+		replay(filterBuilder);
+		
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+		verify(filterBuilder);
+		
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertTrue(field1.getOmitTf());
+		assertFalse(field1.getOmitNorms());
+		assertTrue(field1.isIndexed());
+		assertFalse(field1.isStored());
+	}
+	
+	@Test
+	public void testCreateFieldNoNormsTF() throws Exception{
+		
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS_TF);
+
+		TokenStream tokenStream = createMock(TokenStream.class);
+    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+    replay(filterBuilder);
+		
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+		verify(filterBuilder);		
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertTrue(field1.getOmitTf());
+		assertTrue(field1.getOmitNorms());
+		assertTrue(field1.isIndexed());
+		assertFalse(field1.isStored());
+	}
+	
+	@Test
+	public void testCreateFieldTermVector() throws Exception{
+		
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+		fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_YES);
+		tokenStreams.remove(1);
+
+    TokenStream tokenStream = createMock(TokenStream.class);
+    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+    replay(filterBuilder);
+
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+    verify(filterBuilder);
+    
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertFalse(field1.isStoreOffsetWithTermVector());
+		assertTrue(field1.isTermVectorStored());
+		assertFalse(field1.isStorePositionWithTermVector());
+	}
+	
+	@Test
+	public void testCreateFieldTermVectorOffset() throws Exception{
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+		fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_OFFSETS);
+		tokenStreams.remove(1);
+
+    TokenStream tokenStream = createMock(TokenStream.class);
+    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+    replay(filterBuilder);
+
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+    verify(filterBuilder);
+    
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertTrue(field1.isStoreOffsetWithTermVector());
+		assertTrue(field1.isTermVectorStored());
+		assertFalse(field1.isStorePositionWithTermVector());
+	}
+	
+	@Test
+	public void testCreateFieldTermVectorPositions() throws Exception{
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+		fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS);
+		tokenStreams.remove(1);
+
+		TokenStream tokenStream = createMock(TokenStream.class);
+    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+    replay(filterBuilder);
+
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+		verify(filterBuilder);
+		
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertFalse(field1.isStoreOffsetWithTermVector());
+		assertTrue(field1.isTermVectorStored());
+		assertTrue(field1.isStorePositionWithTermVector());
+	}
+
+	@Test
+	public void testCreateFieldTermVectorOffsetPositions() throws Exception{
+
+		fieldDescription.setMerge(true);
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+		fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
+		tokenStreams.remove(1);
+
+    TokenStream tokenStream = createMock(TokenStream.class);
+    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+    replay(filterBuilder);
+
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+    verify(filterBuilder);
+    
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertTrue(field1.isStoreOffsetWithTermVector());
+		assertTrue(field1.isTermVectorStored());
+		assertTrue(field1.isStorePositionWithTermVector());
+	}
+
+	
+	@Test
+	public void testCreateFieldIndexStored() throws Exception{
+		
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+		fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
+		tokenStreams.remove(1);
+
+    expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1);
+    replay(filterBuilder);
+		
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+    verify(filterBuilder);
+    
+		assertEquals(4, fields.size());
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertFalse(field1.isIndexed());
+		assertTrue(field1.isStored());
+		assertEquals("token1", field1.stringValue());
+
+		Field field2 = fieldIterator.next();
+		assertEquals("field1", field2.name());
+		assertFalse(field2.isIndexed());
+		assertTrue(field2.isStored());
+		assertEquals("token2", field2.stringValue());
+
+		Field field3 = fieldIterator.next();
+		assertEquals("field1", field3.name());
+		assertFalse(field3.isIndexed());
+		assertTrue(field3.isStored());
+		assertEquals("token3", field3.stringValue());
+
+		Field field4 = fieldIterator.next();
+		assertEquals("field1", field4.name());
+		assertTrue(field4.isIndexed());
+		assertFalse(field4.isStored());
+	}
+	
+	@Test
+	public void testCreateFieldIndexStoredDelimiter() throws Exception{
+		
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+		fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
+		fieldDescription.setDelimiter(" ");
+		tokenStreams.remove(1);
+		
+    expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1);
+    replay(filterBuilder);
+    
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+		verify(filterBuilder);
+		
+		assertEquals(2, fields.size());
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertFalse(field1.isIndexed());
+		assertTrue(field1.isStored());
+		assertEquals("token1 token2 token3", field1.stringValue());
+
+		Field field2 = fieldIterator.next();
+		assertEquals("field1", field2.name());
+		assertTrue(field2.isIndexed());
+		assertFalse(field2.isStored());
+	}
+	
+	@Test
+	public void testCreateFieldIndexStoredCompress() throws Exception{
+		fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+		fieldDescription.setStored(FieldBuilder.FIELD_STORE_COMPRESS);
+		tokenStreams.remove(1);
+		
+    expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1);
+    replay(filterBuilder);
+
+		Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+    verify(filterBuilder);
+    
+		assertEquals(4, fields.size());
+		Iterator<Field> fieldIterator = fields.iterator();
+		Field field1 = fieldIterator.next();
+		assertEquals("field1", field1.name());
+		assertFalse(field1.isIndexed());
+		assertTrue(field1.isStored());
+		assertTrue(field1.isCompressed());
+		assertEquals("token1", field1.stringValue());
+
+		Field field2 = fieldIterator.next();
+		assertEquals("field1", field2.name());
+		assertFalse(field2.isIndexed());
+		assertTrue(field2.isStored());
+		assertTrue(field2.isCompressed());
+		assertEquals("token2", field2.stringValue());
+
+		Field field3 = fieldIterator.next();
+		assertEquals("field1", field3.name());
+		assertFalse(field3.isIndexed());
+		assertTrue(field3.isStored());
+		assertTrue(field3.isCompressed());
+		assertEquals("token3", field3.stringValue());
+
+		Field field4 = fieldIterator.next();
+		assertEquals("field1", field4.name());
+		assertTrue(field4.isIndexed());
+		assertFalse(field4.isStored());
+	}
 }

Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -19,163 +19,102 @@
 
 package org.apache.uima.lucas.indexer;
 
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.verify;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
-import org.apache.uima.lucas.indexer.FilterBuilder;
-import org.apache.uima.lucas.indexer.analysis.AdditionTokenFilter;
-import org.apache.uima.lucas.indexer.analysis.HypernymTokenFilter;
-import org.apache.uima.lucas.indexer.analysis.PositionFilter;
-import org.apache.uima.lucas.indexer.analysis.ReplaceFilter;
-import org.apache.uima.lucas.indexer.analysis.SplitterFilter;
-import org.apache.uima.lucas.indexer.analysis.UniqueFilter;
-import org.apache.uima.lucas.indexer.analysis.UpperCaseTokenFilter;
+import org.apache.uima.lucas.indexer.analysis.TokenFilterFactory;
+import org.apache.uima.lucas.indexer.mapping.FilterDescription;
 import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
 import org.junit.Before;
 import org.junit.Test;
 
-import com.google.common.collect.Lists;
 
 public class FilterBuilderTest {
 
-  private static final String PORTER = "Porter";
-
-  private static final String SUFFIX = "suffix";
-
-  private static final String PREFIX = "prefix";
-
-  private static final String MAP_FILE_NAME = "mapfile.txt";
-
+	private static final String LOWER_CASE_FACTORY_ID = "lowerCaseFactory";
   private FilterBuilder filterBuilder;
-
-  private AnnotationDescription annotationDescription;
-
-  private String[] stopwords;
-
-  private Map<String, Map<String, String>> tokenMappings;
-
-  private Map<String, List<String>> hypernyms;
-
-  private TokenStream tokenStream;
-
-  private Map<String, String> tokenMapping;
-
-  @Before
-  public void setUp() {
-    annotationDescription = new AnnotationDescription(null);
-    Collection<Token> tokens = new ArrayList<Token>();
-    tokens.add(new Token("token1".toCharArray(), 0, 6, 0, 6));
-    tokens.add(new Token("token2".toCharArray(), 0, 6, 7, 13));
-    tokens.add(new Token("token3".toCharArray(), 0, 6, 14, 20));
-
-    tokenMappings = new HashMap<String, Map<String, String>>();
-    tokenMapping = new HashMap<String, String>();
-    tokenMappings.put(MAP_FILE_NAME, tokenMapping);
-
-    tokenStream = new CollectionTokenStream(tokens);
-    stopwords = new String[] { "na", "und", "nu" };
-    hypernyms = new HashMap<String, List<String>>();
-    hypernyms.put("token1", Lists.newArrayList("token111", "token11", "token1"));
-
-    filterBuilder = new FilterBuilder(stopwords, hypernyms, tokenMappings);
-  }
-
-  @Test
-  public void testFilterMapping() throws Exception {
-    annotationDescription.setMappingFile(MAP_FILE_NAME);
-    ReplaceFilter replaceFilter =
-            (ReplaceFilter) filterBuilder.filter(tokenStream, annotationDescription);
-    assertEquals(tokenMapping, replaceFilter.getMapping());
-  }
-
-  @Test
-  public void testFilterPosition() throws Exception {
-    annotationDescription.setPosition(FilterBuilder.POSITION_FIRST);
-    PositionFilter positionFilter =
-            (PositionFilter) filterBuilder.filter(tokenStream, annotationDescription);
-    assertEquals(PositionFilter.FIRST_POSITION, positionFilter.getPosition());
-
-    annotationDescription.setPosition(FilterBuilder.POSITION_LAST);
-    positionFilter = (PositionFilter) filterBuilder.filter(tokenStream, annotationDescription);
-    assertEquals(PositionFilter.LAST_POSITION, positionFilter.getPosition());
-  }
-
-  @Test
-  public void testFilterAddition() throws Exception {
-    annotationDescription.setPrefix(PREFIX);
-    AdditionTokenFilter additionFilter =
-            (AdditionTokenFilter) filterBuilder.filter(tokenStream, annotationDescription);
-    assertEquals(AdditionTokenFilter.PREFIX, additionFilter.getPosition());
-    assertEquals(PREFIX, additionFilter.getAddition());
-
-    annotationDescription.setPostfix(SUFFIX);
-    additionFilter = (AdditionTokenFilter) filterBuilder.filter(tokenStream, annotationDescription);
-    assertEquals(PositionFilter.LAST_POSITION, additionFilter.getPosition());
-    assertEquals(SUFFIX, additionFilter.getAddition());
+	private TokenStream tokenStream;
+  
+	
+	@Before
+	public void setUp(){
+		Collection<Token> tokens = new ArrayList<Token>();
+		tokens.add(new Token("token1".toCharArray(),0,6,0,6));
+		tokens.add(new Token("token2".toCharArray(),0,6,7,13));
+		tokens.add(new Token("token3".toCharArray(),0,6,14,20));
+
+		tokenStream = new CollectionTokenStream(tokens);
+		
+		filterBuilder = new FilterBuilder(new HashMap<String, TokenFilterFactory>());
+	}
+	
+	@Test
+	public void testFilterWithoutFactory() throws Exception{
+	  Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+	  filterDescriptions.add(new FilterDescription(LowerCaseFilter.class.getCanonicalName(), null, null, false, null));
+	  
+	  TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, filterDescriptions);
+	  assertEquals(LowerCaseFilter.class, filteredTokenStream.getClass());
+	}
+
+  @Test
+  public void testFilterWithFactory() throws Exception{
+    Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+    filterDescriptions.add(new FilterDescription(null, DummyTokenFilterFactory.class.getCanonicalName(), null, false, null));
+    
+    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, filterDescriptions);
+    assertEquals(LowerCaseFilter.class, filteredTokenStream.getClass());
+  }
+
+  @Test
+  public void testFilterWithNamedAndCachedFactory() throws Exception{
+    Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+    filterDescriptions.add(new FilterDescription(null, DummyTokenFilterFactory.class.getCanonicalName(), LOWER_CASE_FACTORY_ID, true, null));
+    
+    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, filterDescriptions);
+    assertEquals(LowerCaseFilter.class, filteredTokenStream.getClass());
+    Map<String, TokenFilterFactory> cachedFactories = filterBuilder.getCachedFactories();
+    assertEquals(LOWER_CASE_FACTORY_ID, cachedFactories.keySet().iterator().next());
+    assertTrue(cachedFactories.get(LOWER_CASE_FACTORY_ID) instanceof DummyTokenFilterFactory);
+
+    // test caching
+    filterDescriptions = new ArrayList<FilterDescription>();
+    filterDescriptions.add(new FilterDescription(null, null, LOWER_CASE_FACTORY_ID, true, null));
+    TokenFilterFactory factory = createMock(TokenFilterFactory.class);
+    expect(factory.createTokenFilter(tokenStream, null)).andReturn(null);
+    replay(factory);
+    
+    cachedFactories.put(LOWER_CASE_FACTORY_ID, factory);
+    filterBuilder.filter(tokenStream, filterDescriptions);
+    verify(factory);
+  }
+	
+  @Test
+  public void testFilterWithPredefinedFactory() throws Exception{
+    Map<String, TokenFilterFactory> predifinedFactories = new HashMap<String, TokenFilterFactory>();
+    
+    // test caching
+    Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+    filterDescriptions.add(new FilterDescription(null, null, LOWER_CASE_FACTORY_ID, true, null));
+    TokenFilterFactory factory = createMock(TokenFilterFactory.class);
+    expect(factory.createTokenFilter(tokenStream, null)).andReturn(null);
+    replay(factory);
+    
+    predifinedFactories.put(LOWER_CASE_FACTORY_ID, factory);
+    filterBuilder = new FilterBuilder(predifinedFactories);
+    filterBuilder.filter(tokenStream, filterDescriptions);
+    verify(factory);
   }
-
-  @Test
-  public void testFilterSplit() throws Exception {
-    annotationDescription.setSplitString(" ");
-    SplitterFilter splitterFilter =
-            (SplitterFilter) filterBuilder.filter(tokenStream, annotationDescription);
-    assertEquals(" ", splitterFilter.getSplitString());
-  }
-
-  @Test
-  public void testFilterLowercase() throws Exception {
-    annotationDescription.setLowercase(true);
-    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
-    assertTrue(filteredTokenStream instanceof LowerCaseFilter);
-  }
-
-  @Test
-  public void testFilterUpperCase() throws Exception {
-    annotationDescription.setUppercase(true);
-    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
-    assertTrue(filteredTokenStream instanceof UpperCaseTokenFilter);
-  }
-
-  @Test
-  public void testFilterStopFilter() throws Exception {
-    annotationDescription.setStopwordRemove(true);
-    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
-    assertTrue(filteredTokenStream instanceof StopFilter);
-  }
-
-  @Test
-  public void testFilterHypernyms() throws Exception {
-    annotationDescription.setAddHypernyms(true);
-    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
-    assertTrue(filteredTokenStream instanceof HypernymTokenFilter);
-    HypernymTokenFilter hypernymTokenFilter = (HypernymTokenFilter) filteredTokenStream;
-    assertEquals(hypernyms, hypernymTokenFilter.getHypernyms());
-  }
-
-  @Test
-  public void testFilterPorter() throws Exception {
-    annotationDescription.setSnowballFilter(PORTER);
-    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
-    assertTrue(filteredTokenStream instanceof SnowballFilter);
-  }
-
-  @Test
-  public void testFilterUnique() throws Exception {
-    annotationDescription.setUnique(true);
-    TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
-    assertTrue(filteredTokenStream instanceof UniqueFilter);
-  }
-
 }

Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer;
+
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.management.ManagementFactory;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.uima.resource.DataResource;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class IndexWriterProviderImplTest {
+
+  private static final String TEST_INDEX = "src/test/resources/test-index";
+  private static final String RESOURCES_TEST_INDEX_PROPERTIES = "src/test/resources/IndexWriter.properties";
+  private IndexWriterProviderImpl indexWriterProviderImpl;
+  private DataResource dataResource;
+  private InputStream propertiesInputStream;
+  
+  @Before
+  public void setUp() throws IOException{
+    indexWriterProviderImpl = new IndexWriterProviderImpl();
+    dataResource = createMock(DataResource.class);
+    FileInputStream fileInputStream = new FileInputStream(RESOURCES_TEST_INDEX_PROPERTIES);
+    propertiesInputStream = new BufferedInputStream(fileInputStream);
+  }
+  
+  @After
+  public void tearDown() throws Exception{
+    
+    FSDirectory directory = (FSDirectory) indexWriterProviderImpl.getIndexWriter().getDirectory();
+    File directoryFile = directory.getFile();
+    
+    directory = FSDirectory.getDirectory(directoryFile);
+    IndexWriter.unlock(directory);
+    
+    for( String file: directory.list() )
+      directory.deleteFile(file);
+
+    directory.getFile().delete();
+  }
+  
+  @Test
+  public void testLoadData() throws IOException, ResourceInitializationException{
+    
+    expect(dataResource.getInputStream()).andReturn(propertiesInputStream);
+    replay(dataResource);
+    
+    indexWriterProviderImpl.load(dataResource);
+    IndexWriter indexWriter = indexWriterProviderImpl.getIndexWriter();
+    FSDirectory fsDirectory = (FSDirectory) indexWriter.getDirectory();
+    
+    String hostname = getHostName();
+    String pid = getPID();
+    
+    assertTrue(fsDirectory.getFile().getAbsolutePath().endsWith(TEST_INDEX +"-"+hostname+"-"+pid));
+    assertEquals(513, indexWriter.getRAMBufferSizeMB(), 0.5);
+    assertEquals(9999, indexWriter.getMaxFieldLength(), 0.5);
+  }
+   
+  protected String getPID(){
+    String id = ManagementFactory.getRuntimeMXBean().getName();
+    return id.substring(0, id.indexOf("@") );   
+  }
+  
+  public String getHostName(){
+    InetAddress address;
+    String hostName;
+    try {
+      address = InetAddress.getLocalHost();
+      hostName = address.getHostName();
+    } catch (UnknownHostException e) {
+      throw new IllegalStateException(e);
+    }       
+    return hostName;
+  }
+}

Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java Fri Jul 24 08:58:52 2009
@@ -28,47 +28,46 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
 import org.apache.uima.lucas.indexer.Tokenizer;
+import org.apache.uima.lucas.indexer.mapping.AnnotationDescription;
 import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
 import org.junit.Before;
 import org.junit.Test;
 
-public class TokenizerTest {
-  private Tokenizer tokenizer;
-
-  private AnnotationDescription annotationDescription;
-
-  private TokenStream tokenStream;
-
-  @Before
-  public void setUp() {
-    tokenizer = new Tokenizer();
-    annotationDescription = new AnnotationDescription(null);
-    Collection<Token> tokens = new ArrayList<Token>();
-    tokens.add(new Token("token1".toCharArray(), 0, 6, 0, 6));
-    tokens.add(new Token("token2".toCharArray(), 0, 6, 7, 13));
-    tokens.add(new Token("token3".toCharArray(), 0, 6, 14, 20));
-
-    tokenStream = new CollectionTokenStream(tokens);
-  }
 
-  @Test
-  public void testTokenizeWhiteSpace() throws Exception {
-    annotationDescription.setTokenizer(Tokenizer.TOKENIZER_WHITESPACE);
-    assertTrue(tokenizer.needsTokenization(annotationDescription));
-
-    TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
-    assertTrue(reTokenizedTokenStream instanceof WhitespaceTokenizer);
-  }
-
-  @Test
-  public void testTokenizeStandard() throws Exception {
-    annotationDescription.setTokenizer(Tokenizer.TOKENIZER_STANDARD);
-    assertTrue(tokenizer.needsTokenization(annotationDescription));
-
-    TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
-    assertTrue(reTokenizedTokenStream instanceof StandardTokenizer);
-  }
+public class TokenizerTest {
+	private Tokenizer tokenizer;
+	private AnnotationDescription annotationDescription;
+	private TokenStream tokenStream; 
+	
+	@Before
+	public void setUp(){
+		tokenizer = new Tokenizer();
+		annotationDescription = new AnnotationDescription(null);
+		Collection<Token> tokens = new ArrayList<Token>();
+		tokens.add(new Token("token1".toCharArray(),0,6,0,6));
+		tokens.add(new Token("token2".toCharArray(),0,6,7,13));
+		tokens.add(new Token("token3".toCharArray(),0,6,14,20));
+
+		tokenStream = new CollectionTokenStream(tokens);
+	}
+	
+	@Test
+	public void testTokenizeWhiteSpace() throws Exception{
+		annotationDescription.setTokenizer(Tokenizer.TOKENIZER_WHITESPACE);
+		assertTrue(tokenizer.needsTokenization(annotationDescription));
+		
+		TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
+		assertTrue(reTokenizedTokenStream instanceof WhitespaceTokenizer );
+	}
+	
+	@Test
+	public void testTokenizeStandard() throws Exception{
+		annotationDescription.setTokenizer(Tokenizer.TOKENIZER_STANDARD);
+		assertTrue(tokenizer.needsTokenization(annotationDescription));
+		
+		TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
+		assertTrue(reTokenizedTokenStream instanceof StandardTokenizer );
+	}
 
 }

Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.util.Properties;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.easymock.classextension.EasyMock.*;
+import static org.junit.Assert.*;
+
+public class AdditionFilterFactoryTest {
+
+	private static final String TEST_STRING = "test";
+	private AdditionFilterFactory additionFilterFactory;
+	private TokenStream tokenStream;
+	
+	@Before
+	public void setUp(){
+		tokenStream = createMock(TokenStream.class);
+		additionFilterFactory = new AdditionFilterFactory();
+	}
+	
+	@Test
+	public void testCreateTokenFilter() throws Exception{
+		Properties properties = new Properties();
+		properties.setProperty(AdditionFilterFactory.POSTFIX_POSITION, TEST_STRING);
+		
+		AdditionFilter additionFilter = (AdditionFilter) additionFilterFactory.createTokenFilter(tokenStream, properties);
+		
+		assertEquals(AdditionFilter.POSTFIX, additionFilter.getPosition());
+		assertEquals(TEST_STRING, additionFilter.getAddition());
+	}
+}

Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.analysis.AdditionFilter;
+import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class AdditionFilterTest {
+
+	@Test
+	public void testNext() throws Exception{		
+		Collection<Token> tokens = new ArrayList<Token>();
+		tokens.add(new Token("token1", 0, 6));
+		tokens.add(new Token("token2", 7, 13));
+		tokens.add(new Token("token3", 14, 20));
+		tokens.add(new Token("token4", 21, 27));
+		
+		TokenStream tokenStream = new CollectionTokenStream(tokens);
+		AdditionFilter filter = new AdditionFilter(tokenStream, "prefix_", AdditionFilter.PREFIX);
+		
+		Token next = filter.next();
+		assertEquals("prefix_token1", new String(next.termBuffer(), 0, next.termLength()));
+		next = filter.next();
+		assertEquals("prefix_token2", new String(next.termBuffer(), 0, next.termLength()));
+		next = filter.next();
+		assertEquals("prefix_token3", new String(next.termBuffer(), 0, next.termLength()));
+		next = filter.next();
+		assertEquals("prefix_token4", new String(next.termBuffer(), 0, next.termLength()));		
+
+		tokens = new ArrayList<Token>();
+		tokens.add(new Token("token1", 0, 6));
+		tokens.add(new Token("token2", 7, 13));
+		tokens.add(new Token("token3", 14, 20));
+		tokens.add(new Token("token4", 21, 27));
+		
+		tokenStream = new CollectionTokenStream(tokens);
+		filter = new AdditionFilter(tokenStream, "_postfix", AdditionFilter.POSTFIX);
+		
+		next = filter.next();
+		assertEquals("token1_postfix", new String(next.termBuffer(), 0, next.termLength()));
+		next = filter.next();
+		assertEquals("token2_postfix", new String(next.termBuffer(), 0, next.termLength()));
+		next = filter.next();
+		assertEquals("token3_postfix", new String(next.termBuffer(), 0, next.termLength()));
+		next = filter.next();
+		assertEquals("token4_postfix", new String(next.termBuffer(), 0, next.termLength()));				
+	}
+}