You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by jo...@apache.org on 2009/07/24 10:58:55 UTC
svn commit: r797373 [4/8] - in /incubator/uima/sandbox/trunk/Lucas: ./
docbook/ docbook/LuceneCASConsumerUserGuide/
docbook/LuceneCASConsumerUserGuide/images/
src/main/java/org/apache/uima/lucas/consumer/
src/main/java/org/apache/uima/lucas/indexer/ sr...
Modified: incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/main/resources/LuceneCASIndexer.xml Fri Jul 24 08:58:52 2009
@@ -21,66 +21,20 @@
<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
- <implementationName>de.julielab.jules.consumer.LuceneCASIndexer</implementationName>
+ <implementationName>org.apache.uima.consumer.LuceneCASIndexer</implementationName>
<processingResourceMetaData>
- <name>LuceneCASIndexerDescriptor</name>
+ <name>LuceneCASIndexer</name>
<description/>
- <version>2.0</version>
- <vendor>julielab</vendor>
+ <version>0.6</version>
+ <vendor>apache</vendor>
<configurationParameters>
<configurationParameter>
- <name>indexOutDir</name>
- <description>defines the output directory where the index should be written</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>true</mandatory>
- </configurationParameter>
- <configurationParameter>
<name>mappingFile</name>
<description>path to the mapping file</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
- <configurationParameter>
- <name>stopwordFile</name>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>hypernymFile</name>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>tokenMappingFile</name>
- <description>Mapping file for replacement of tokens.</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>uniqueIndex</name>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>ramBufferSize</name>
- <description>Sets the ram buffer size of the index writer. See lucene docs for further information.</description>
- <type>Integer</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>compoundFileFormat</name>
- <description>Determines wether the index writer should use compound file format or not.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
@@ -89,50 +43,6 @@
<string>src/test/resources/lucas.xml</string>
</value>
</nameValuePair>
- <nameValuePair>
- <name>indexOutDir</name>
- <value>
- <string>src/test/resources/testIndex</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>stopwordFile</name>
- <value>
- <string>src/test/resources/stopwords.txt</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>hypernymFile</name>
- <value>
- <string>src/test/resources/hypernyms.txt</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>tokenMappingFile</name>
- <value>
- <array>
- <string>src/test/resources/tokenMapping.txt</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>uniqueIndex</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>ramBufferSize</name>
- <value>
- <integer>512</integer>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>compoundFileFormat</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
</configurationParameterSettings>
<typeSystemDescription/>
<typePriorities/>
@@ -146,9 +56,26 @@
</capabilities>
<operationalProperties>
<modifiesCas>false</modifiesCas>
- <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</processingResourceMetaData>
- <resourceManagerConfiguration/>
+ <resourceManagerConfiguration>
+ <externalResources>
+ <externalResource>
+ <name>LucasIndexWriterProvider</name>
+ <description>Provides a index writer instance.</description>
+ <fileResourceSpecifier>
+ <fileUrl>file:src/test/resources/IndexWriter.properties</fileUrl>
+ </fileResourceSpecifier>
+ <implementationName>org.apache.uima.indexer.IndexWriterProviderImpl</implementationName>
+ </externalResource>
+ </externalResources>
+ <externalResourceBindings>
+ <externalResourceBinding>
+ <key>indexWriterProvider</key>
+ <resourceName>LucasIndexWriterProvider</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+ </resourceManagerConfiguration>
</casConsumerDescription>
Modified: incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/main/resources/lucas.xsd Fri Jul 24 08:58:52 2009
@@ -62,44 +62,68 @@
<xs:enumeration value="first"/>
<xs:enumeration value="last"/>
</xs:restriction>
- </xs:simpleType>
+ </xs:simpleType>
+
+ <xs:element name="filters">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element ref="filter" minOccurs="1" maxOccurs="unbounded"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="filter">
+ <xs:complexType>
+ <xs:attribute name="className" type="xs:string"/>
+ <xs:attribute name="factoryClassName" type="xs:string"/>
+ <xs:attribute name="reuseFactory" type="xs:boolean"/>
+ <xs:attribute name="name" type="xs:string"/>
+ <xs:anyAttribute processContents="lax"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="features">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element ref="feature" minOccurs="1" maxOccurs="unbounded"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
<xs:element name="feature">
<xs:complexType>
<xs:attribute name="name" type="xs:string"/>
- <xs:attribute name="uppercase" type="xs:boolean"/>
- <xs:attribute name="lowercase" type="xs:boolean"/>
<xs:attribute name="numberFormat" type="xs:string"/>
</xs:complexType>
</xs:element>
+
+ <xs:element name="annotations">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element ref="annotation" minOccurs="1" maxOccurs="unbounded"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
<xs:element name="annotation">
<xs:complexType>
<xs:sequence>
- <xs:element ref="feature" minOccurs="0" maxOccurs="unbounded"/>
+ <xs:element ref="filters" minOccurs="0" maxOccurs="1"/>
+ <xs:element ref="features" minOccurs="1" maxOccurs="1"/>
</xs:sequence>
<xs:attribute name="type" type="xs:string"/>
<xs:attribute name="sofa" type="xs:string"/>
<xs:attribute name="featurePath" type="xs:string"/>
- <xs:attribute name="concatString" type="xs:string"/>
- <xs:attribute name="splitString" type="xs:string"/>
- <xs:attribute name="prefix" type="xs:string"/>
- <xs:attribute name="uppercase" type="xs:boolean"/>
- <xs:attribute name="lowercase" type="xs:boolean"/>
- <xs:attribute name="stopwordRemove" type="xs:boolean"/>
- <xs:attribute name="position" type="positionType"/>
- <xs:attribute name="addHypernyms" type="xs:boolean"/>
- <xs:attribute name="mappingFile" type="xs:string"/>
- <xs:attribute name="snowballFilter" type="xs:string"/>
- <xs:attribute name="unique" type="xs:boolean"/>
- <xs:attribute name="tokenizer" type="tokenizerType"/>
+ <xs:attribute name="tokenizer" type="tokenizerType"/>
+ <xs:attribute name="featureValueDelimiterString" type="xs:string"/>
</xs:complexType>
</xs:element>
<xs:element name="field">
<xs:complexType>
<xs:sequence>
- <xs:element ref="annotation" maxOccurs="unbounded"/>
+ <xs:element ref="filters" minOccurs="0" maxOccurs="1"/>
+ <xs:element ref="annotations" minOccurs="1" maxOccurs="1"/>
</xs:sequence>
<xs:attribute name="name" type="xs:string"/>
<xs:attribute name="index" type="indexType"/>
Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/consumer/LuceneCASIndexerTest.java Fri Jul 24 08:58:52 2009
@@ -19,146 +19,106 @@
package org.apache.uima.lucas.consumer;
-import static org.junit.Assert.assertArrayEquals;
+import static org.easymock.EasyMock.capture;
+import static org.easymock.classextension.EasyMock.createMock;
+import static org.easymock.classextension.EasyMock.replay;
+import static org.easymock.classextension.EasyMock.verify;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
-import java.lang.management.ManagementFactory;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
+import java.io.IOException;
import java.util.Collection;
-import java.util.List;
-import java.util.Map;
+import java.util.Properties;
import org.apache.lucene.store.FSDirectory;
import org.apache.uima.UIMAFramework;
import org.apache.uima.collection.CasConsumerDescription;
-import org.apache.uima.lucas.consumer.LuceneCASIndexer;
-import org.apache.uima.lucas.indexer.FieldDescription;
+import org.apache.uima.lucas.indexer.analysis.TokenFilterFactory;
+import org.apache.uima.lucas.indexer.mapping.FieldDescription;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
+import org.easymock.Capture;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-public class LuceneCASIndexerTest {
-
- private static final String TOKEN_MAPPING_TXT = "tokenMapping.txt";
-
- private static final String HYPERNYM_ID1 = "id1";
+import com.google.common.collect.Maps;
- private static final Object HYPERNYM_ID2 = "id2";
-
- private static final String[] STOP_WORDS = new String[] { "na", "und", "nu" };
+public class LuceneCASIndexerTest {
+ private static final String TEST_FILTER_ANNOTATION = "testFilterAnnotation";
+ private static final String TEST_FILTER_FIELD = "testFilterField";
private static final String FIELD_NAME = "annotation1";
+ private static final String DESCRIPTOR_FILE = "src/main/resources/LuceneCASIndexer.xml";
+ private static final String INDEX_DIRECTORY = "src/test/resources/test-index";
- private static final String WRITE_LOCK = "write.lock";
-
- private static final String DESCRIPTOR_FILE = "src/main/resources/LuceneCASIndexer.xml";
-
- private static final String INDEX_DIRECTORY = "src/test/resources/testIndex";
-
- private LuceneCASIndexer consumer;
-
- @Before
- public void setUp() throws Exception {
- CasConsumerDescription consumerDescription =
- (CasConsumerDescription) UIMAFramework.getXMLParser().parseCasConsumerDescription(
- new XMLInputSource(DESCRIPTOR_FILE));
- consumer = (LuceneCASIndexer) UIMAFramework.produceCasConsumer(consumerDescription);
- }
-
- @After
- public void tearDown() throws Exception {
-
- FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
- File directoryFile = directory.getFile();
- consumer.destroy();
-
- directory = FSDirectory.getDirectory(directoryFile);
-
- // directory.deleteFile(WRITE_LOCK);
- for (String file : directory.list())
- directory.deleteFile(file);
-
- directory.getFile().delete();
- }
-
- @Test
- public void testIndexOutDir() {
- FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
-
- String path = directory.getFile().getPath();
- assertTrue(path.contains(INDEX_DIRECTORY));
- }
-
- @Test
- public void testMappingFile() {
- Collection<FieldDescription> fieldDescriptions = consumer.getFieldDescriptions();
- assertEquals(1, fieldDescriptions.size());
- FieldDescription fieldDescription = fieldDescriptions.iterator().next();
- assertEquals(FIELD_NAME, fieldDescription.getName());
- assertEquals(2, fieldDescription.getAnnotationDescriptions().size());
- }
-
- @Test
- public void testStopwordFile() {
- String[] stopwords = consumer.getFilterBuilder().getStopwords();
- assertArrayEquals(STOP_WORDS, stopwords);
- }
-
- @Test
- public void testHypernymFile() {
- Map<String, List<String>> hypernyms = consumer.getFilterBuilder().getHypernyms();
- assertEquals(2, hypernyms.size());
- assertTrue(hypernyms.containsKey(HYPERNYM_ID1));
- assertTrue(hypernyms.containsKey(HYPERNYM_ID2));
- }
-
- @Test
- public void testTokenMappingFile() {
- Map<String, Map<String, String>> tokenMappings = consumer.getFilterBuilder().getMappings();
- assertTrue(tokenMappings.containsKey(TOKEN_MAPPING_TXT));
- assertEquals(2, tokenMappings.get(TOKEN_MAPPING_TXT).size());
- }
-
- @Test
- public void testUniqueIndex() {
- String hostname = getHostName();
- String pid = getPID();
-
- FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
-
- String path = directory.getFile().getPath();
- assertTrue(path.endsWith(INDEX_DIRECTORY + "-" + hostname + "-" + pid));
- }
-
- @Test
- public void testRamBufferSize() {
- assertEquals(512, consumer.getIndexWriter().getRAMBufferSizeMB(), 0);
- }
-
- @Test
- public void testCompoundFileFormat() {
- assertTrue(consumer.getIndexWriter().getUseCompoundFile());
- }
-
- protected String getPID() {
- String id = ManagementFactory.getRuntimeMXBean().getName();
- return id.substring(0, id.indexOf("@"));
- }
-
- public String getHostName() {
- InetAddress address;
- String hostName;
- try {
- address = InetAddress.getLocalHost();
- hostName = address.getHostName();
- } catch (UnknownHostException e) {
- throw new IllegalStateException(e);
- }
+ private LuceneCASIndexer consumer;
- return hostName;
- }
+ @Before
+ public void setUp() throws InvalidXMLException, IOException, ResourceInitializationException{
+
+ CasConsumerDescription consumerDescription = (CasConsumerDescription) UIMAFramework.getXMLParser().parseCasConsumerDescription(new XMLInputSource(DESCRIPTOR_FILE));
+ consumer = (LuceneCASIndexer) UIMAFramework.produceCasConsumer(consumerDescription);
+ }
+
+ @After
+ public void tearDown() throws Exception{
+ FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
+ File directoryFile = directory.getFile();
+ consumer.destroy();
+
+ directory = FSDirectory.getDirectory(directoryFile);
+
+ for( String file: directory.list() )
+ directory.deleteFile(file);
+
+ directory.getFile().delete();
+ }
+
+ @Test
+ public void testIndexOutDir(){
+ FSDirectory directory = (FSDirectory) consumer.getIndexWriter().getDirectory();
+
+ String path = directory.getFile().getPath();
+ assertTrue(path.contains(INDEX_DIRECTORY));
+ }
+
+ @Test
+ public void testMappingFile(){
+ Collection<FieldDescription> fieldDescriptions = consumer.getFieldDescriptions();
+ assertEquals(1, fieldDescriptions.size());
+ FieldDescription fieldDescription = fieldDescriptions.iterator().next();
+ assertEquals(FIELD_NAME, fieldDescription.getName());
+ assertEquals(2, fieldDescription.getAnnotationDescriptions().size());
+ }
+
+ @Test
+ public void testPreloadResources() throws IOException{
+ Collection<FieldDescription> fieldDescriptions = consumer.getFieldDescriptions();
+ TokenFilterFactory testFactoryField = createMock(TokenFilterFactory.class);
+ TokenFilterFactory testFactoryAnnotation = createMock(TokenFilterFactory.class);
+
+ Capture<Properties> propertiesCaptureField = new Capture<Properties>();
+ Capture<Properties> propertiesCaptureAnnotation = new Capture<Properties>();
+
+ testFactoryField.preloadResources(capture(propertiesCaptureField));
+ testFactoryAnnotation.preloadResources(capture(propertiesCaptureAnnotation));
+
+ replay(testFactoryField);
+ replay(testFactoryAnnotation);
+
+ consumer.preloadResources(fieldDescriptions, Maps.immutableBiMap(TEST_FILTER_ANNOTATION, testFactoryAnnotation,
+ TEST_FILTER_FIELD, testFactoryField));
+ verify(testFactoryField);
+ verify(testFactoryAnnotation);
+
+ Properties fieldFilterProperties = propertiesCaptureField.getValue();
+ assertEquals("value1", fieldFilterProperties.getProperty("key1"));
+
+ Properties annotationFilterProperties = propertiesCaptureAnnotation.getValue();
+ assertEquals("value2", annotationFilterProperties.getProperty("key2"));
+ }
+
}
Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/AnnotationTokenStreamBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -19,8 +19,7 @@
package org.apache.uima.lucas.indexer;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
import java.text.DecimalFormat;
@@ -29,111 +28,101 @@
import org.apache.uima.cas.Type;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.collection.CollectionReaderDescription;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
import org.apache.uima.lucas.indexer.AnnotationTokenStreamBuilder;
-import org.apache.uima.lucas.indexer.FeatureDescription;
import org.apache.uima.lucas.indexer.analysis.AnnotationTokenStream;
+import org.apache.uima.lucas.indexer.mapping.AnnotationDescription;
+import org.apache.uima.lucas.indexer.mapping.FeatureDescription;
import org.apache.uima.lucas.indexer.types.test.Annotation1;
import org.apache.uima.lucas.indexer.types.test.FeatureStructure1;
import org.apache.uima.lucas.indexer.types.test.FeatureStructure2;
+import org.apache.uima.jcas.JCas;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.XMLInputSource;
import org.junit.Before;
import org.junit.Test;
-public class AnnotationTokenStreamBuilderTest {
- private final static String READER_DESCRIPTOR =
- "src/test/resources/AnnotationTokenStreamTestDummyCollectionReader.xml";
-
- private static final String DOCUMENT_TEXT = "token1 token2 token3";
- private CollectionReader reader;
-
- private JCas cas;
-
- private AnnotationDescription annotationDescription;
-
- private AnnotationTokenStreamBuilder annotationTokenStreamBuilder;
-
- private Type annotation1Type;
-
- private Annotation1 annotation1;
-
- @Before
- public void setUp() throws Exception {
- CollectionReaderDescription readerDescription =
- (CollectionReaderDescription) UIMAFramework.getXMLParser()
- .parseCollectionReaderDescription(new XMLInputSource(READER_DESCRIPTOR));
- reader = UIMAFramework.produceCollectionReader(readerDescription);
- cas = CasCreationUtils.createCas(reader.getProcessingResourceMetaData()).getJCas();
- cas.setDocumentText(DOCUMENT_TEXT);
- annotation1 = new Annotation1(cas);
- annotation1.setBegin(0);
- annotation1.setEnd(6);
- annotation1.addToIndexes();
-
- annotation1Type = annotation1.getType();
- annotationDescription = new AnnotationDescription(annotation1Type.getName());
- annotationTokenStreamBuilder = new AnnotationTokenStreamBuilder();
- }
-
- @Test
- public void testBuildCoveredTextAnnotationTokenStream() throws Exception {
- AnnotationTokenStream annotationTokenStream =
- annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
- assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
- assertEquals(0, annotationTokenStream.getFeatureNames().size());
- assertEquals(0, annotationTokenStream.getFeatureFormats().size());
- assertEquals("token1", annotationTokenStream.next(new Token()).term());
- }
-
- @Test
- public void testBuildFeatureAnnotationTokenStream() throws Exception {
- annotation1.setFeatureString("token1Feature1");
- annotationDescription.getFeatureDescriptions().add(new FeatureDescription("featureString"));
- AnnotationTokenStream annotationTokenStream =
- annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
- assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
- assertEquals(1, annotationTokenStream.getFeatureNames().size());
- assertEquals(0, annotationTokenStream.getFeatureFormats().size());
- assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
- }
-
- @Test
- public void testBuildFeatureAnnotationTokenStreamWithFormat() throws Exception {
- annotation1.setFeatureInteger(3);
-
- FeatureDescription featureDescription = new FeatureDescription("featureInteger");
- featureDescription.setNumberFormat("##");
-
- annotationDescription.getFeatureDescriptions().add(featureDescription);
- AnnotationTokenStream annotationTokenStream =
- annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
- assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
- assertEquals(1, annotationTokenStream.getFeatureNames().size());
- assertEquals(1, annotationTokenStream.getFeatureFormats().size());
- assertTrue(annotationTokenStream.getFeatureFormats().get("featureInteger") instanceof DecimalFormat);
- assertEquals("3", annotationTokenStream.next(new Token()).term());
- }
-
- @Test
- public void testBuildFeaturePathAnnotationTokenStream() throws Exception {
- FeatureStructure1 featureStructure1 = new FeatureStructure1(cas);
- featureStructure1.setFeature1("token1Feature1");
- FeatureStructure2 featureStructure2 = new FeatureStructure2(cas);
- featureStructure2.setFeature1("token1Feature3Feature1");
- featureStructure1.setFeature3(featureStructure2);
- annotation1.setFeatureStructure1(featureStructure1);
-
- annotationDescription.getFeatureDescriptions().add(new FeatureDescription("feature1"));
- annotationDescription.setFeaturePath("featureStructure1");
- AnnotationTokenStream annotationTokenStream =
- annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
- assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
- assertEquals(1, annotationTokenStream.getFeatureNames().size());
- assertEquals(0, annotationTokenStream.getFeatureFormats().size());
- assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
- }
+public class AnnotationTokenStreamBuilderTest {
+ private final static String READER_DESCRIPTOR = "src/test/resources/AnnotationTokenStreamTestDummyCollectionReader.xml";
+ private static final String DOCUMENT_TEXT = "token1 token2 token3";
+ private CollectionReader reader;
+ private JCas cas;
+ private AnnotationDescription annotationDescription;
+ private AnnotationTokenStreamBuilder annotationTokenStreamBuilder;
+ private Type annotation1Type;
+ private Annotation1 annotation1;
+
+ @Before
+ public void setUp() throws Exception {
+ CollectionReaderDescription readerDescription = (CollectionReaderDescription) UIMAFramework.getXMLParser().parseCollectionReaderDescription(new XMLInputSource(READER_DESCRIPTOR));
+ reader = UIMAFramework.produceCollectionReader(readerDescription);
+ cas = CasCreationUtils.createCas(reader.getProcessingResourceMetaData()).getJCas();
+ cas.setDocumentText(DOCUMENT_TEXT);
+ annotation1 = new Annotation1(cas);
+ annotation1.setBegin(0);
+ annotation1.setEnd(6);
+ annotation1.addToIndexes();
+
+ annotation1Type = annotation1.getType();
+ annotationDescription = new AnnotationDescription();
+ annotationDescription.setType(annotation1Type.getName());
+ annotationTokenStreamBuilder = new AnnotationTokenStreamBuilder();
+ }
+
+ @Test
+ public void testBuildCoveredTextAnnotationTokenStream() throws Exception{
+ AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+ assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+ assertEquals(0, annotationTokenStream.getFeatureNames().size());
+ assertEquals(0, annotationTokenStream.getFeatureFormats().size());
+ assertEquals("token1", annotationTokenStream.next(new Token()).term());
+ }
+
+ @Test
+ public void testBuildFeatureAnnotationTokenStream() throws Exception{
+ annotation1.setFeatureString("token1Feature1");
+ annotationDescription.getFeatureDescriptions().add(new FeatureDescription("featureString"));
+ AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+ assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+ assertEquals(1, annotationTokenStream.getFeatureNames().size());
+ assertEquals(0, annotationTokenStream.getFeatureFormats().size());
+ assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
+ }
+
+ @Test
+ public void testBuildFeatureAnnotationTokenStreamWithFormat() throws Exception{
+ annotation1.setFeatureInteger(3);
+
+ FeatureDescription featureDescription = new FeatureDescription("featureInteger");
+ featureDescription.setNumberFormat("##");
+
+ annotationDescription.getFeatureDescriptions().add(featureDescription);
+ AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+ assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+ assertEquals(1, annotationTokenStream.getFeatureNames().size());
+ assertEquals(1, annotationTokenStream.getFeatureFormats().size());
+ assertTrue(annotationTokenStream.getFeatureFormats().get("featureInteger") instanceof DecimalFormat);
+ assertEquals("3", annotationTokenStream.next(new Token()).term());
+ }
+
+ @Test
+ public void testBuildFeaturePathAnnotationTokenStream() throws Exception{
+ FeatureStructure1 featureStructure1 = new FeatureStructure1(cas);
+ featureStructure1.setFeature1("token1Feature1");
+ FeatureStructure2 featureStructure2 = new FeatureStructure2(cas);
+ featureStructure2.setFeature1("token1Feature3Feature1");
+ featureStructure1.setFeature3(featureStructure2);
+ annotation1.setFeatureStructure1(featureStructure1);
+
+ annotationDescription.getFeatureDescriptions().add(new FeatureDescription("feature1"));
+ annotationDescription.setFeaturePath("featureStructure1");
+ AnnotationTokenStream annotationTokenStream = annotationTokenStreamBuilder.createAnnotationTokenStream(cas, annotationDescription);
+ assertEquals(annotation1Type, annotationTokenStream.getAnnotationType());
+ assertEquals(1, annotationTokenStream.getFeatureNames().size());
+ assertEquals(0, annotationTokenStream.getFeatureFormats().size());
+ assertEquals("token1Feature1", annotationTokenStream.next(new Token()).term());
+ }
+
+
}
Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DocumentBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -36,31 +36,28 @@
public class DocumentBuilderTest {
- private Field field1;
-
- private Field field2;
-
- private DocumentBuilder documentBuilder;
-
- private Collection<Field> fields;
-
- @Before
- public void setUp() {
- documentBuilder = new DocumentBuilder();
-
- field1 = new Field("field1", createMock(TokenStream.class));
- field2 = new Field("field2", createMock(TokenStream.class));
- fields = new ArrayList<Field>();
- fields.add(field1);
- fields.add(field2);
- }
-
- @Test
- public void testCreateDocument() throws CASException, IOException {
- Document document = documentBuilder.createDocument(fields);
-
- assertEquals(field1, document.getField("field1"));
- assertEquals(field2, document.getField("field2"));
- }
-
+ private Field field1;
+ private Field field2;
+ private DocumentBuilder documentBuilder;
+ private Collection<Field> fields;
+
+ @Before
+ public void setUp(){
+ documentBuilder = new DocumentBuilder();
+
+ field1 = new Field("field1", createMock(TokenStream.class));
+ field2 = new Field("field2", createMock(TokenStream.class));
+ fields = new ArrayList<Field>();
+ fields.add(field1);
+ fields.add(field2);
+ }
+
+ @Test
+ public void testCreateDocument() throws CASException, IOException{
+ Document document = documentBuilder.createDocument(fields);
+
+ assertEquals(field1, document.getField("field1"));
+ assertEquals(field2, document.getField("field2"));
+ }
+
}
Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/DummyTokenFilterFactory.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.analysis.TokenFilterFactory;
+
+public class DummyTokenFilterFactory implements TokenFilterFactory {
+
+ public TokenFilter createTokenFilter(TokenStream tokenStream, Properties properties) {
+
+ return new LowerCaseFilter(tokenStream);
+ }
+
+ public void preloadResources(Properties properties) throws IOException {
+ }
+
+}
Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FieldBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -19,13 +19,18 @@
package org.apache.uima.lucas.indexer;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.isA;
import static org.easymock.classextension.EasyMock.createMock;
+import static org.easymock.classextension.EasyMock.replay;
+import static org.easymock.classextension.EasyMock.verify;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -33,11 +38,10 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
-import org.apache.uima.lucas.indexer.FieldBuilder;
-import org.apache.uima.lucas.indexer.FieldDescription;
import org.apache.uima.lucas.indexer.analysis.TokenStreamConcatenator;
import org.apache.uima.lucas.indexer.analysis.TokenStreamMerger;
+import org.apache.uima.lucas.indexer.mapping.AnnotationDescription;
+import org.apache.uima.lucas.indexer.mapping.FieldDescription;
import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
import org.junit.Before;
import org.junit.Test;
@@ -46,298 +50,351 @@
public class FieldBuilderTest {
- private FieldBuilder fieldBuilder;
-
- private FieldDescription fieldDescription;
-
- private AnnotationDescription annotationDescription1;
-
- private AnnotationDescription annotationDescription2;
-
- private JCas cas;
-
- private TokenStream tokenStream1;
-
- private TokenStream tokenStream2;
-
- private List<TokenStream> tokenStreams;
-
- @Before
- public void setUp() {
- annotationDescription1 = new AnnotationDescription("uima.cas.Annotation");
- annotationDescription2 = new AnnotationDescription("uima.cas.Annotation");
- fieldBuilder = new FieldBuilder();
- cas = createMock(JCas.class);
-
- Collection<Token> tokens1 = new ArrayList<Token>();
- tokens1.add(new Token("token1".toCharArray(), 0, 6, 0, 6));
- tokens1.add(new Token("token2".toCharArray(), 0, 6, 7, 13));
- tokens1.add(new Token("token3".toCharArray(), 0, 6, 14, 20));
-
- Collection<Token> tokens2 = new ArrayList<Token>();
- tokens2.add(new Token("token4".toCharArray(), 0, 6, 0, 6));
- tokens2.add(new Token("token5".toCharArray(), 0, 6, 7, 13));
- tokens2.add(new Token("token6".toCharArray(), 0, 6, 14, 20));
-
- tokenStream1 = new CollectionTokenStream(tokens1);
- tokenStream2 = new CollectionTokenStream(tokens2);
-
- tokenStreams = Lists.newArrayList(tokenStream1, tokenStream2);
-
- fieldDescription = new FieldDescription("field1");
- fieldDescription.getAnnotationDescriptions().add(annotationDescription1);
- fieldDescription.getAnnotationDescriptions().add(annotationDescription2);
- }
-
- @Test
- public void testCreateFieldConcatenated() throws Exception {
-
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertTrue(field1.tokenStreamValue() instanceof TokenStreamConcatenator);
-
- }
-
- @Test
- public void testCreateFieldMerged() throws Exception {
-
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertTrue(field1.tokenStreamValue() instanceof TokenStreamMerger);
- }
-
- @Test
- public void testCreateFieldNoIndex() throws Exception {
-
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- assertEquals(0, fields.size());
- }
-
- @Test
- public void testCreateFieldNoNorms() throws Exception {
-
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertTrue(field1.getOmitNorms());
- assertTrue(field1.isIndexed());
- assertFalse(field1.isStored());
- }
-
- @Test
- public void testCreateFieldNoTF() throws Exception {
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_TF);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertTrue(field1.getOmitTf());
- assertFalse(field1.getOmitNorms());
- assertTrue(field1.isIndexed());
- assertFalse(field1.isStored());
- }
-
- @Test
- public void testCreateFieldNoNormsTF() throws Exception {
-
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS_TF);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertTrue(field1.getOmitTf());
- assertTrue(field1.getOmitNorms());
- assertTrue(field1.isIndexed());
- assertFalse(field1.isStored());
- }
-
- @Test
- public void testCreateFieldTermVector() throws Exception {
-
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_YES);
- tokenStreams.remove(1);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertFalse(field1.isStoreOffsetWithTermVector());
- assertTrue(field1.isTermVectorStored());
- assertFalse(field1.isStorePositionWithTermVector());
- }
-
- @Test
- public void testCreateFieldTermVectorOffset() throws Exception {
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_OFFSETS);
- tokenStreams.remove(1);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertTrue(field1.isStoreOffsetWithTermVector());
- assertTrue(field1.isTermVectorStored());
- assertFalse(field1.isStorePositionWithTermVector());
- }
-
- @Test
- public void testCreateFieldTermVectorPositions() throws Exception {
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS);
- tokenStreams.remove(1);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertFalse(field1.isStoreOffsetWithTermVector());
- assertTrue(field1.isTermVectorStored());
- assertTrue(field1.isStorePositionWithTermVector());
- }
-
- @Test
- public void testCreateFieldTermVectorOffsetPositions() throws Exception {
-
- fieldDescription.setMerge(true);
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
- tokenStreams.remove(1);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertTrue(field1.isStoreOffsetWithTermVector());
- assertTrue(field1.isTermVectorStored());
- assertTrue(field1.isStorePositionWithTermVector());
- }
-
- @Test
- public void testCreateFieldIndexStored() throws Exception {
-
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
- tokenStreams.remove(1);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- assertEquals(4, fields.size());
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertFalse(field1.isIndexed());
- assertTrue(field1.isStored());
- assertEquals("token1", field1.stringValue());
-
- Field field2 = fieldIterator.next();
- assertEquals("field1", field2.name());
- assertFalse(field2.isIndexed());
- assertTrue(field2.isStored());
- assertEquals("token2", field2.stringValue());
-
- Field field3 = fieldIterator.next();
- assertEquals("field1", field3.name());
- assertFalse(field3.isIndexed());
- assertTrue(field3.isStored());
- assertEquals("token3", field3.stringValue());
-
- Field field4 = fieldIterator.next();
- assertEquals("field1", field4.name());
- assertTrue(field4.isIndexed());
- assertFalse(field4.isStored());
- }
-
- @Test
- public void testCreateFieldIndexStoredDelimiter() throws Exception {
-
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
- fieldDescription.setDelimiter(" ");
- tokenStreams.remove(1);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- assertEquals(2, fields.size());
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertFalse(field1.isIndexed());
- assertTrue(field1.isStored());
- assertEquals("token1 token2 token3", field1.stringValue());
-
- Field field2 = fieldIterator.next();
- assertEquals("field1", field2.name());
- assertTrue(field2.isIndexed());
- assertFalse(field2.isStored());
- }
-
- @Test
- public void testCreateFieldIndexStoredCompress() throws Exception {
- fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
- fieldDescription.setStored(FieldBuilder.FIELD_STORE_COMPRESS);
- tokenStreams.remove(1);
-
- Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
-
- assertEquals(4, fields.size());
- Iterator<Field> fieldIterator = fields.iterator();
- Field field1 = fieldIterator.next();
- assertEquals("field1", field1.name());
- assertFalse(field1.isIndexed());
- assertTrue(field1.isStored());
- assertTrue(field1.isCompressed());
- assertEquals("token1", field1.stringValue());
-
- Field field2 = fieldIterator.next();
- assertEquals("field1", field2.name());
- assertFalse(field2.isIndexed());
- assertTrue(field2.isStored());
- assertTrue(field2.isCompressed());
- assertEquals("token2", field2.stringValue());
-
- Field field3 = fieldIterator.next();
- assertEquals("field1", field3.name());
- assertFalse(field3.isIndexed());
- assertTrue(field3.isStored());
- assertTrue(field3.isCompressed());
- assertEquals("token3", field3.stringValue());
-
- Field field4 = fieldIterator.next();
- assertEquals("field1", field4.name());
- assertTrue(field4.isIndexed());
- assertFalse(field4.isStored());
- }
+ private FieldBuilder fieldBuilder;
+ private FieldDescription fieldDescription;
+ private AnnotationDescription annotationDescription1;
+ private AnnotationDescription annotationDescription2;
+ private JCas cas;
+
+ private TokenStream tokenStream1;
+ private TokenStream tokenStream2;
+ private List<TokenStream> tokenStreams;
+ private FilterBuilder filterBuilder;
+
+ @Before
+ public void setUp(){
+ annotationDescription1= new AnnotationDescription("uima.cas.Annotation");
+ annotationDescription2= new AnnotationDescription("uima.cas.Annotation");
+ filterBuilder = createMock(FilterBuilder.class);
+ fieldBuilder = new FieldBuilder(filterBuilder);
+ cas = createMock(JCas.class);
+
+ Collection<Token> tokens1 = new ArrayList<Token>();
+ tokens1.add(new Token("token1".toCharArray(),0,6,0,6));
+ tokens1.add(new Token("token2".toCharArray(),0,6,7,13));
+ tokens1.add(new Token("token3".toCharArray(),0,6,14,20));
+
+ Collection<Token> tokens2 = new ArrayList<Token>();
+ tokens2.add(new Token("token4".toCharArray(),0,6,0,6));
+ tokens2.add(new Token("token5".toCharArray(),0,6,7,13));
+ tokens2.add(new Token("token6".toCharArray(),0,6,14,20));
+
+ tokenStream1 = new CollectionTokenStream(tokens1);
+ tokenStream2 = new CollectionTokenStream(tokens2);
+
+ tokenStreams = Lists.newArrayList(tokenStream1, tokenStream2);
+
+ fieldDescription = new FieldDescription("field1");
+ fieldDescription.getAnnotationDescriptions().add(annotationDescription1);
+ fieldDescription.getAnnotationDescriptions().add(annotationDescription2);
+ fieldDescription.setFilterDescriptions(Collections.EMPTY_LIST);
+ }
+
+ @Test
+ public void testCreateFieldConcatenated() throws Exception{
+
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamConcatenator.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertEquals(tokenStream, field1.tokenStreamValue());
+
+ }
+
+ @Test
+ public void testCreateFieldMerged() throws Exception{
+
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertEquals(tokenStream, field1.tokenStreamValue());
+ }
+
+ @Test
+ public void testCreateFieldNoIndex() throws Exception{
+
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+
+ assertEquals(0, fields.size());
+ }
+
+ @Test
+ public void testCreateFieldNoNorms() throws Exception{
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+
+ verify(filterBuilder);
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertTrue(field1.getOmitNorms());
+ assertTrue(field1.isIndexed());
+ assertFalse(field1.isStored());
+ }
+
+ @Test
+ public void testCreateFieldNoTF() throws Exception{
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_TF);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertTrue(field1.getOmitTf());
+ assertFalse(field1.getOmitNorms());
+ assertTrue(field1.isIndexed());
+ assertFalse(field1.isStored());
+ }
+
+ @Test
+ public void testCreateFieldNoNormsTF() throws Exception{
+
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS_TF);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertTrue(field1.getOmitTf());
+ assertTrue(field1.getOmitNorms());
+ assertTrue(field1.isIndexed());
+ assertFalse(field1.isStored());
+ }
+
+ @Test
+ public void testCreateFieldTermVector() throws Exception{
+
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_YES);
+ tokenStreams.remove(1);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertFalse(field1.isStoreOffsetWithTermVector());
+ assertTrue(field1.isTermVectorStored());
+ assertFalse(field1.isStorePositionWithTermVector());
+ }
+
+ @Test
+ public void testCreateFieldTermVectorOffset() throws Exception{
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_OFFSETS);
+ tokenStreams.remove(1);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertTrue(field1.isStoreOffsetWithTermVector());
+ assertTrue(field1.isTermVectorStored());
+ assertFalse(field1.isStorePositionWithTermVector());
+ }
+
+ @Test
+ public void testCreateFieldTermVectorPositions() throws Exception{
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS);
+ tokenStreams.remove(1);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertFalse(field1.isStoreOffsetWithTermVector());
+ assertTrue(field1.isTermVectorStored());
+ assertTrue(field1.isStorePositionWithTermVector());
+ }
+
+ @Test
+ public void testCreateFieldTermVectorOffsetPositions() throws Exception{
+
+ fieldDescription.setMerge(true);
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
+ tokenStreams.remove(1);
+
+ TokenStream tokenStream = createMock(TokenStream.class);
+ expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertTrue(field1.isStoreOffsetWithTermVector());
+ assertTrue(field1.isTermVectorStored());
+ assertTrue(field1.isStorePositionWithTermVector());
+ }
+
+
+ @Test
+ public void testCreateFieldIndexStored() throws Exception{
+
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
+ tokenStreams.remove(1);
+
+ expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ assertEquals(4, fields.size());
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertFalse(field1.isIndexed());
+ assertTrue(field1.isStored());
+ assertEquals("token1", field1.stringValue());
+
+ Field field2 = fieldIterator.next();
+ assertEquals("field1", field2.name());
+ assertFalse(field2.isIndexed());
+ assertTrue(field2.isStored());
+ assertEquals("token2", field2.stringValue());
+
+ Field field3 = fieldIterator.next();
+ assertEquals("field1", field3.name());
+ assertFalse(field3.isIndexed());
+ assertTrue(field3.isStored());
+ assertEquals("token3", field3.stringValue());
+
+ Field field4 = fieldIterator.next();
+ assertEquals("field1", field4.name());
+ assertTrue(field4.isIndexed());
+ assertFalse(field4.isStored());
+ }
+
+ @Test
+ public void testCreateFieldIndexStoredDelimiter() throws Exception{
+
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES);
+ fieldDescription.setDelimiter(" ");
+ tokenStreams.remove(1);
+
+ expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ assertEquals(2, fields.size());
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertFalse(field1.isIndexed());
+ assertTrue(field1.isStored());
+ assertEquals("token1 token2 token3", field1.stringValue());
+
+ Field field2 = fieldIterator.next();
+ assertEquals("field1", field2.name());
+ assertTrue(field2.isIndexed());
+ assertFalse(field2.isStored());
+ }
+
+ @Test
+ public void testCreateFieldIndexStoredCompress() throws Exception{
+ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
+ fieldDescription.setStored(FieldBuilder.FIELD_STORE_COMPRESS);
+ tokenStreams.remove(1);
+
+ expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1);
+ replay(filterBuilder);
+
+ Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
+ verify(filterBuilder);
+
+ assertEquals(4, fields.size());
+ Iterator<Field> fieldIterator = fields.iterator();
+ Field field1 = fieldIterator.next();
+ assertEquals("field1", field1.name());
+ assertFalse(field1.isIndexed());
+ assertTrue(field1.isStored());
+ assertTrue(field1.isCompressed());
+ assertEquals("token1", field1.stringValue());
+
+ Field field2 = fieldIterator.next();
+ assertEquals("field1", field2.name());
+ assertFalse(field2.isIndexed());
+ assertTrue(field2.isStored());
+ assertTrue(field2.isCompressed());
+ assertEquals("token2", field2.stringValue());
+
+ Field field3 = fieldIterator.next();
+ assertEquals("field1", field3.name());
+ assertFalse(field3.isIndexed());
+ assertTrue(field3.isStored());
+ assertTrue(field3.isCompressed());
+ assertEquals("token3", field3.stringValue());
+
+ Field field4 = fieldIterator.next();
+ assertEquals("field1", field4.name());
+ assertTrue(field4.isIndexed());
+ assertFalse(field4.isStored());
+ }
}
Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/FilterBuilderTest.java Fri Jul 24 08:58:52 2009
@@ -19,163 +19,102 @@
package org.apache.uima.lucas.indexer;
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.verify;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
-import org.apache.uima.lucas.indexer.FilterBuilder;
-import org.apache.uima.lucas.indexer.analysis.AdditionTokenFilter;
-import org.apache.uima.lucas.indexer.analysis.HypernymTokenFilter;
-import org.apache.uima.lucas.indexer.analysis.PositionFilter;
-import org.apache.uima.lucas.indexer.analysis.ReplaceFilter;
-import org.apache.uima.lucas.indexer.analysis.SplitterFilter;
-import org.apache.uima.lucas.indexer.analysis.UniqueFilter;
-import org.apache.uima.lucas.indexer.analysis.UpperCaseTokenFilter;
+import org.apache.uima.lucas.indexer.analysis.TokenFilterFactory;
+import org.apache.uima.lucas.indexer.mapping.FilterDescription;
import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
import org.junit.Before;
import org.junit.Test;
-import com.google.common.collect.Lists;
public class FilterBuilderTest {
- private static final String PORTER = "Porter";
-
- private static final String SUFFIX = "suffix";
-
- private static final String PREFIX = "prefix";
-
- private static final String MAP_FILE_NAME = "mapfile.txt";
-
+ private static final String LOWER_CASE_FACTORY_ID = "lowerCaseFactory";
private FilterBuilder filterBuilder;
-
- private AnnotationDescription annotationDescription;
-
- private String[] stopwords;
-
- private Map<String, Map<String, String>> tokenMappings;
-
- private Map<String, List<String>> hypernyms;
-
- private TokenStream tokenStream;
-
- private Map<String, String> tokenMapping;
-
- @Before
- public void setUp() {
- annotationDescription = new AnnotationDescription(null);
- Collection<Token> tokens = new ArrayList<Token>();
- tokens.add(new Token("token1".toCharArray(), 0, 6, 0, 6));
- tokens.add(new Token("token2".toCharArray(), 0, 6, 7, 13));
- tokens.add(new Token("token3".toCharArray(), 0, 6, 14, 20));
-
- tokenMappings = new HashMap<String, Map<String, String>>();
- tokenMapping = new HashMap<String, String>();
- tokenMappings.put(MAP_FILE_NAME, tokenMapping);
-
- tokenStream = new CollectionTokenStream(tokens);
- stopwords = new String[] { "na", "und", "nu" };
- hypernyms = new HashMap<String, List<String>>();
- hypernyms.put("token1", Lists.newArrayList("token111", "token11", "token1"));
-
- filterBuilder = new FilterBuilder(stopwords, hypernyms, tokenMappings);
- }
-
- @Test
- public void testFilterMapping() throws Exception {
- annotationDescription.setMappingFile(MAP_FILE_NAME);
- ReplaceFilter replaceFilter =
- (ReplaceFilter) filterBuilder.filter(tokenStream, annotationDescription);
- assertEquals(tokenMapping, replaceFilter.getMapping());
- }
-
- @Test
- public void testFilterPosition() throws Exception {
- annotationDescription.setPosition(FilterBuilder.POSITION_FIRST);
- PositionFilter positionFilter =
- (PositionFilter) filterBuilder.filter(tokenStream, annotationDescription);
- assertEquals(PositionFilter.FIRST_POSITION, positionFilter.getPosition());
-
- annotationDescription.setPosition(FilterBuilder.POSITION_LAST);
- positionFilter = (PositionFilter) filterBuilder.filter(tokenStream, annotationDescription);
- assertEquals(PositionFilter.LAST_POSITION, positionFilter.getPosition());
- }
-
- @Test
- public void testFilterAddition() throws Exception {
- annotationDescription.setPrefix(PREFIX);
- AdditionTokenFilter additionFilter =
- (AdditionTokenFilter) filterBuilder.filter(tokenStream, annotationDescription);
- assertEquals(AdditionTokenFilter.PREFIX, additionFilter.getPosition());
- assertEquals(PREFIX, additionFilter.getAddition());
-
- annotationDescription.setPostfix(SUFFIX);
- additionFilter = (AdditionTokenFilter) filterBuilder.filter(tokenStream, annotationDescription);
- assertEquals(PositionFilter.LAST_POSITION, additionFilter.getPosition());
- assertEquals(SUFFIX, additionFilter.getAddition());
+ private TokenStream tokenStream;
+
+
+ @Before
+ public void setUp(){
+ Collection<Token> tokens = new ArrayList<Token>();
+ tokens.add(new Token("token1".toCharArray(),0,6,0,6));
+ tokens.add(new Token("token2".toCharArray(),0,6,7,13));
+ tokens.add(new Token("token3".toCharArray(),0,6,14,20));
+
+ tokenStream = new CollectionTokenStream(tokens);
+
+ filterBuilder = new FilterBuilder(new HashMap<String, TokenFilterFactory>());
+ }
+
+ @Test
+ public void testFilterWithoutFactory() throws Exception{
+ Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+ filterDescriptions.add(new FilterDescription(LowerCaseFilter.class.getCanonicalName(), null, null, false, null));
+
+ TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, filterDescriptions);
+ assertEquals(LowerCaseFilter.class, filteredTokenStream.getClass());
+ }
+
+ @Test
+ public void testFilterWithFactory() throws Exception{
+ Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+ filterDescriptions.add(new FilterDescription(null, DummyTokenFilterFactory.class.getCanonicalName(), null, false, null));
+
+ TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, filterDescriptions);
+ assertEquals(LowerCaseFilter.class, filteredTokenStream.getClass());
+ }
+
+ @Test
+ public void testFilterWithNamedAndCachedFactory() throws Exception{
+ Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+ filterDescriptions.add(new FilterDescription(null, DummyTokenFilterFactory.class.getCanonicalName(), LOWER_CASE_FACTORY_ID, true, null));
+
+ TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, filterDescriptions);
+ assertEquals(LowerCaseFilter.class, filteredTokenStream.getClass());
+ Map<String, TokenFilterFactory> cachedFactories = filterBuilder.getCachedFactories();
+ assertEquals(LOWER_CASE_FACTORY_ID, cachedFactories.keySet().iterator().next());
+ assertTrue(cachedFactories.get(LOWER_CASE_FACTORY_ID) instanceof DummyTokenFilterFactory);
+
+ // test caching
+ filterDescriptions = new ArrayList<FilterDescription>();
+ filterDescriptions.add(new FilterDescription(null, null, LOWER_CASE_FACTORY_ID, true, null));
+ TokenFilterFactory factory = createMock(TokenFilterFactory.class);
+ expect(factory.createTokenFilter(tokenStream, null)).andReturn(null);
+ replay(factory);
+
+ cachedFactories.put(LOWER_CASE_FACTORY_ID, factory);
+ filterBuilder.filter(tokenStream, filterDescriptions);
+ verify(factory);
+ }
+
+ @Test
+ public void testFilterWithPredefinedFactory() throws Exception{
+ Map<String, TokenFilterFactory> predifinedFactories = new HashMap<String, TokenFilterFactory>();
+
+ // test caching
+ Collection<FilterDescription> filterDescriptions = new ArrayList<FilterDescription>();
+ filterDescriptions.add(new FilterDescription(null, null, LOWER_CASE_FACTORY_ID, true, null));
+ TokenFilterFactory factory = createMock(TokenFilterFactory.class);
+ expect(factory.createTokenFilter(tokenStream, null)).andReturn(null);
+ replay(factory);
+
+ predifinedFactories.put(LOWER_CASE_FACTORY_ID, factory);
+ filterBuilder = new FilterBuilder(predifinedFactories);
+ filterBuilder.filter(tokenStream, filterDescriptions);
+ verify(factory);
}
-
- @Test
- public void testFilterSplit() throws Exception {
- annotationDescription.setSplitString(" ");
- SplitterFilter splitterFilter =
- (SplitterFilter) filterBuilder.filter(tokenStream, annotationDescription);
- assertEquals(" ", splitterFilter.getSplitString());
- }
-
- @Test
- public void testFilterLowercase() throws Exception {
- annotationDescription.setLowercase(true);
- TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
- assertTrue(filteredTokenStream instanceof LowerCaseFilter);
- }
-
- @Test
- public void testFilterUpperCase() throws Exception {
- annotationDescription.setUppercase(true);
- TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
- assertTrue(filteredTokenStream instanceof UpperCaseTokenFilter);
- }
-
- @Test
- public void testFilterStopFilter() throws Exception {
- annotationDescription.setStopwordRemove(true);
- TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
- assertTrue(filteredTokenStream instanceof StopFilter);
- }
-
- @Test
- public void testFilterHypernyms() throws Exception {
- annotationDescription.setAddHypernyms(true);
- TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
- assertTrue(filteredTokenStream instanceof HypernymTokenFilter);
- HypernymTokenFilter hypernymTokenFilter = (HypernymTokenFilter) filteredTokenStream;
- assertEquals(hypernyms, hypernymTokenFilter.getHypernyms());
- }
-
- @Test
- public void testFilterPorter() throws Exception {
- annotationDescription.setSnowballFilter(PORTER);
- TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
- assertTrue(filteredTokenStream instanceof SnowballFilter);
- }
-
- @Test
- public void testFilterUnique() throws Exception {
- annotationDescription.setUnique(true);
- TokenStream filteredTokenStream = filterBuilder.filter(tokenStream, annotationDescription);
- assertTrue(filteredTokenStream instanceof UniqueFilter);
- }
-
}
Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/IndexWriterProviderImplTest.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer;
+
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.management.ManagementFactory;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.uima.resource.DataResource;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class IndexWriterProviderImplTest {
+
+ private static final String TEST_INDEX = "src/test/resources/test-index";
+ private static final String RESOURCES_TEST_INDEX_PROPERTIES = "src/test/resources/IndexWriter.properties";
+ private IndexWriterProviderImpl indexWriterProviderImpl;
+ private DataResource dataResource;
+ private InputStream propertiesInputStream;
+
+ @Before
+ public void setUp() throws IOException{
+ indexWriterProviderImpl = new IndexWriterProviderImpl();
+ dataResource = createMock(DataResource.class);
+ FileInputStream fileInputStream = new FileInputStream(RESOURCES_TEST_INDEX_PROPERTIES);
+ propertiesInputStream = new BufferedInputStream(fileInputStream);
+ }
+
+ @After
+ public void tearDown() throws Exception{
+
+ FSDirectory directory = (FSDirectory) indexWriterProviderImpl.getIndexWriter().getDirectory();
+ File directoryFile = directory.getFile();
+
+ directory = FSDirectory.getDirectory(directoryFile);
+ IndexWriter.unlock(directory);
+
+ for( String file: directory.list() )
+ directory.deleteFile(file);
+
+ directory.getFile().delete();
+ }
+
+ @Test
+ public void testLoadData() throws IOException, ResourceInitializationException{
+
+ expect(dataResource.getInputStream()).andReturn(propertiesInputStream);
+ replay(dataResource);
+
+ indexWriterProviderImpl.load(dataResource);
+ IndexWriter indexWriter = indexWriterProviderImpl.getIndexWriter();
+ FSDirectory fsDirectory = (FSDirectory) indexWriter.getDirectory();
+
+ String hostname = getHostName();
+ String pid = getPID();
+
+ assertTrue(fsDirectory.getFile().getAbsolutePath().endsWith(TEST_INDEX +"-"+hostname+"-"+pid));
+ assertEquals(513, indexWriter.getRAMBufferSizeMB(), 0.5);
+ assertEquals(9999, indexWriter.getMaxFieldLength(), 0.5);
+ }
+
+ protected String getPID(){
+ String id = ManagementFactory.getRuntimeMXBean().getName();
+ return id.substring(0, id.indexOf("@") );
+ }
+
+ public String getHostName(){
+ InetAddress address;
+ String hostName;
+ try {
+ address = InetAddress.getLocalHost();
+ hostName = address.getHostName();
+ } catch (UnknownHostException e) {
+ throw new IllegalStateException(e);
+ }
+ return hostName;
+ }
+}
Modified: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java?rev=797373&r1=797372&r2=797373&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java (original)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/TokenizerTest.java Fri Jul 24 08:58:52 2009
@@ -28,47 +28,46 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.uima.lucas.indexer.AnnotationDescription;
import org.apache.uima.lucas.indexer.Tokenizer;
+import org.apache.uima.lucas.indexer.mapping.AnnotationDescription;
import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
import org.junit.Before;
import org.junit.Test;
-public class TokenizerTest {
- private Tokenizer tokenizer;
-
- private AnnotationDescription annotationDescription;
-
- private TokenStream tokenStream;
-
- @Before
- public void setUp() {
- tokenizer = new Tokenizer();
- annotationDescription = new AnnotationDescription(null);
- Collection<Token> tokens = new ArrayList<Token>();
- tokens.add(new Token("token1".toCharArray(), 0, 6, 0, 6));
- tokens.add(new Token("token2".toCharArray(), 0, 6, 7, 13));
- tokens.add(new Token("token3".toCharArray(), 0, 6, 14, 20));
-
- tokenStream = new CollectionTokenStream(tokens);
- }
- @Test
- public void testTokenizeWhiteSpace() throws Exception {
- annotationDescription.setTokenizer(Tokenizer.TOKENIZER_WHITESPACE);
- assertTrue(tokenizer.needsTokenization(annotationDescription));
-
- TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
- assertTrue(reTokenizedTokenStream instanceof WhitespaceTokenizer);
- }
-
- @Test
- public void testTokenizeStandard() throws Exception {
- annotationDescription.setTokenizer(Tokenizer.TOKENIZER_STANDARD);
- assertTrue(tokenizer.needsTokenization(annotationDescription));
-
- TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
- assertTrue(reTokenizedTokenStream instanceof StandardTokenizer);
- }
+public class TokenizerTest {
+ private Tokenizer tokenizer;
+ private AnnotationDescription annotationDescription;
+ private TokenStream tokenStream;
+
+ @Before
+ public void setUp(){
+ tokenizer = new Tokenizer();
+ annotationDescription = new AnnotationDescription(null);
+ Collection<Token> tokens = new ArrayList<Token>();
+ tokens.add(new Token("token1".toCharArray(),0,6,0,6));
+ tokens.add(new Token("token2".toCharArray(),0,6,7,13));
+ tokens.add(new Token("token3".toCharArray(),0,6,14,20));
+
+ tokenStream = new CollectionTokenStream(tokens);
+ }
+
+ @Test
+ public void testTokenizeWhiteSpace() throws Exception{
+ annotationDescription.setTokenizer(Tokenizer.TOKENIZER_WHITESPACE);
+ assertTrue(tokenizer.needsTokenization(annotationDescription));
+
+ TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
+ assertTrue(reTokenizedTokenStream instanceof WhitespaceTokenizer );
+ }
+
+ @Test
+ public void testTokenizeStandard() throws Exception{
+ annotationDescription.setTokenizer(Tokenizer.TOKENIZER_STANDARD);
+ assertTrue(tokenizer.needsTokenization(annotationDescription));
+
+ TokenStream reTokenizedTokenStream = tokenizer.tokenize(tokenStream, annotationDescription);
+ assertTrue(reTokenizedTokenStream instanceof StandardTokenizer );
+ }
}
Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterFactoryTest.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.util.Properties;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.easymock.classextension.EasyMock.*;
+import static org.junit.Assert.*;
+
+public class AdditionFilterFactoryTest {
+
+ private static final String TEST_STRING = "test";
+ private AdditionFilterFactory additionFilterFactory;
+ private TokenStream tokenStream;
+
+ @Before
+ public void setUp(){
+ tokenStream = createMock(TokenStream.class);
+ additionFilterFactory = new AdditionFilterFactory();
+ }
+
+ @Test
+ public void testCreateTokenFilter() throws Exception{
+ Properties properties = new Properties();
+ properties.setProperty(AdditionFilterFactory.POSTFIX_POSITION, TEST_STRING);
+
+ AdditionFilter additionFilter = (AdditionFilter) additionFilterFactory.createTokenFilter(tokenStream, properties);
+
+ assertEquals(AdditionFilter.POSTFIX, additionFilter.getPosition());
+ assertEquals(TEST_STRING, additionFilter.getAddition());
+ }
+}
Added: incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java?rev=797373&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java (added)
+++ incubator/uima/sandbox/trunk/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/AdditionFilterTest.java Fri Jul 24 08:58:52 2009
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.analysis.AdditionFilter;
+import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class AdditionFilterTest {
+
+ @Test
+ public void testNext() throws Exception{
+ Collection<Token> tokens = new ArrayList<Token>();
+ tokens.add(new Token("token1", 0, 6));
+ tokens.add(new Token("token2", 7, 13));
+ tokens.add(new Token("token3", 14, 20));
+ tokens.add(new Token("token4", 21, 27));
+
+ TokenStream tokenStream = new CollectionTokenStream(tokens);
+ AdditionFilter filter = new AdditionFilter(tokenStream, "prefix_", AdditionFilter.PREFIX);
+
+ Token next = filter.next();
+ assertEquals("prefix_token1", new String(next.termBuffer(), 0, next.termLength()));
+ next = filter.next();
+ assertEquals("prefix_token2", new String(next.termBuffer(), 0, next.termLength()));
+ next = filter.next();
+ assertEquals("prefix_token3", new String(next.termBuffer(), 0, next.termLength()));
+ next = filter.next();
+ assertEquals("prefix_token4", new String(next.termBuffer(), 0, next.termLength()));
+
+ tokens = new ArrayList<Token>();
+ tokens.add(new Token("token1", 0, 6));
+ tokens.add(new Token("token2", 7, 13));
+ tokens.add(new Token("token3", 14, 20));
+ tokens.add(new Token("token4", 21, 27));
+
+ tokenStream = new CollectionTokenStream(tokens);
+ filter = new AdditionFilter(tokenStream, "_postfix", AdditionFilter.POSTFIX);
+
+ next = filter.next();
+ assertEquals("token1_postfix", new String(next.termBuffer(), 0, next.termLength()));
+ next = filter.next();
+ assertEquals("token2_postfix", new String(next.termBuffer(), 0, next.termLength()));
+ next = filter.next();
+ assertEquals("token3_postfix", new String(next.termBuffer(), 0, next.termLength()));
+ next = filter.next();
+ assertEquals("token4_postfix", new String(next.termBuffer(), 0, next.termLength()));
+ }
+}