You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2012/02/15 14:17:58 UTC
svn commit: r1244474 - in /lucene/dev/trunk/modules/analysis/uima/src:
test-files/uima/ test/org/apache/lucene/analysis/uima/
test/org/apache/lucene/analysis/uima/ae/
test/org/apache/lucene/analysis/uima/an/
Author: tommaso
Date: Wed Feb 15 13:17:57 2012
New Revision: 1244474
URL: http://svn.apache.org/viewvc?rev=1244474&view=rev
Log:
[LUCENE-3731] - creating and using simple wst and pos tagger implementations for analyzers' random string testing
Added:
lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml
lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml
lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
Removed:
lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/AggregateDummySentenceAE.xml
lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/DummyEntityAE.xml
lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/DummyPoSTagger.xml
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyEntityAnnotator.java
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyPoSTagger.java
Modified:
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java
lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java
Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>false</primitive>
+ <delegateAnalysisEngineSpecifiers>
+ <delegateAnalysisEngine key="WhitespaceTokenizer">
+ <import location="TestWSTokenizerAE.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="PoSTagger">
+ <import location="TestPoSTaggerAE.xml"/>
+ </delegateAnalysisEngine>
+ </delegateAnalysisEngineSpecifiers>
+ <analysisEngineMetaData>
+ <name>TestAggregateSentenceAE</name>
+ <description/>
+ <version>1.0</version>
+ <vendor>ASF</vendor>
+ <flowConstraints>
+ <fixedFlow>
+ <node>WhitespaceTokenizer</node>
+ <node>PoSTagger</node>
+ </fixedFlow>
+ </flowConstraints>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+ </outputs>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+</analysisEngineDescription>
Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>DummyPoSTagger</name>
+ <description/>
+ <version>1.0</version>
+ <vendor>ASF</vendor>
+ <typeSystemDescription>
+ <types>
+ <typeDescription>
+ <name>org.apache.lucene.uima.ts.EntityAnnotation</name>
+ <description/>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>name</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>entity</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+ </types>
+ </typeSystemDescription>
+ <capabilities>
+ <capability>
+ <inputs>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+ </inputs>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.EntityAnnotation</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <resourceManagerConfiguration/>
+</analysisEngineDescription>
Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SamplePoSTagger</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>DummyPoSTagger</name>
+ <description/>
+ <version>1.0</version>
+ <vendor>ASF</vendor>
+ <capabilities>
+ <capability>
+ <inputs>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+ </inputs>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+</analysisEngineDescription>
Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>DummyPoSTagger</name>
+ <version>1.0</version>
+ <vendor>ASF</vendor>
+ <typeSystemDescription>
+ <types>
+ <typeDescription>
+ <name>org.apache.lucene.uima.ts.TokenAnnotation</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>pos</name>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.lucene.uima.ts.SentenceAnnotation</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+ </types>
+ </typeSystemDescription>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
+ </outputs>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+</analysisEngineDescription>
Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java Wed Feb 15 13:17:57 2012
@@ -118,7 +118,7 @@ public class UIMABaseAnalyzerTest extend
@Test
public void testRandomStrings() throws Exception {
- checkRandomData(random, new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation"),
+ checkRandomData(random, new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"),
1000 * RANDOM_MULTIPLIER);
}
Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java Wed Feb 15 13:17:57 2012
@@ -60,8 +60,8 @@ public class UIMATypeAwareAnalyzerTest e
@Test
public void testRandomStrings() throws Exception {
- checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/AggregateDummySentenceAE.xml",
- "org.apache.uima.TokenAnnotation", "tokenType"), 1000 * RANDOM_MULTIPLIER);
+ checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
+ "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 1000 * RANDOM_MULTIPLIER);
}
}
Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java Wed Feb 15 13:17:57 2012
@@ -28,8 +28,8 @@ import static org.junit.Assert.assertNot
public class BasicAEProviderTest {
@Test
- public void testBasicInititalization() throws Exception {
- AEProvider basicAEProvider = new BasicAEProvider("/uima/DummyEntityAE.xml");
+ public void testBasicInitialization() throws Exception {
+ AEProvider basicAEProvider = new BasicAEProvider("/uima/TestEntityAnnotatorAE.xml");
AnalysisEngine analysisEngine = basicAEProvider.getAE();
assertNotNull(analysisEngine);
}
Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java Wed Feb 15 13:17:57 2012
@@ -34,7 +34,7 @@ public class OverridingParamsAEProviderT
@Test
public void testNullMapInitialization() throws Exception {
try {
- AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", null);
+ AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", null);
aeProvider.getAE();
fail("should fail due to null Map passed");
} catch (ResourceInitializationException e) {
@@ -44,7 +44,7 @@ public class OverridingParamsAEProviderT
@Test
public void testEmptyMapInitialization() throws Exception {
- AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", new HashMap<String, Object>());
+ AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", new HashMap<String, Object>());
AnalysisEngine analysisEngine = aeProvider.getAE();
assertNotNull(analysisEngine);
}
Added: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java Wed Feb 15 13:17:57 2012
@@ -0,0 +1,64 @@
+package org.apache.lucene.analysis.uima.an;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.uima.TokenAnnotation;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+/**
+ * Dummy implementation of an entity annotator to tag tokens as certain types of entities
+ */
+public class SampleEntityAnnotator extends JCasAnnotator_ImplBase {
+
+ private static final String NP = "np";
+ private static final String NPS = "nps";
+ private static final String TYPE_NAME = "org.apache.lucene.analysis.uima.ts.EntityAnnotation";
+ private static final String ENTITY_FEATURE = "entity";
+ private static final String NAME_FEATURE = "entity";
+
+ @Override
+ public void process(JCas jcas) throws AnalysisEngineProcessException {
+ Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
+ Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
+ Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
+
+ for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
+ String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
+
+ if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
+ AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
+
+ entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
+
+ String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
+ if (annotation.getCoveredText().equals("Apache"))
+ name = "ORGANIZATION";
+ entityAnnotation.setStringValue(nameFeature, name);
+
+ jcas.addFsToIndexes(entityAnnotation);
+ }
+ }
+ }
+
+}
Added: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java Wed Feb 15 13:17:57 2012
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.uima.an;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+/**
+ * Dummy implementation of a PoS tagger to add part of speech as token types
+ */
+public class SamplePoSTagger extends JCasAnnotator_ImplBase {
+
+ private static final String NUM = "NUM";
+ private static final String WORD = "WORD";
+ private static final String TYPE_NAME = "org.apache.lucene.uima.ts.TokenAnnotation";
+ private static final String FEATURE_NAME = "pos";
+
+ @Override
+ public void process(JCas jcas) throws AnalysisEngineProcessException {
+ Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
+ Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
+
+ for (Annotation annotation : jcas.getAnnotationIndex(type)) {
+ String text = annotation.getCoveredText();
+ String pos = extractPoS(text);
+ annotation.setStringValue(posFeature, pos);
+ }
+ }
+
+ private String extractPoS(String text) {
+ try {
+ Double.valueOf(text);
+ return NUM;
+ } catch (Exception e) {
+ return WORD;
+ }
+ }
+}
Added: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java Wed Feb 15 13:17:57 2012
@@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.uima.an;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.jcas.JCas;
+
+/**
+ * Dummy implementation of a UIMA based whitespace tokenizer
+ */
+public class SampleWSTokenizerAnnotator extends JCasAnnotator_ImplBase {
+
+ private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
+ private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
+ private static final String CR = "\n";
+ private static final String WHITESPACE = " ";
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
+ Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
+ int i = 0;
+ for (String sentenceString : jCas.getDocumentText().split(CR)) {
+ // add the sentence
+ AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
+ jCas.addFsToIndexes(sentenceAnnotation);
+ i += sentenceString.length();
+ }
+
+ // get tokens
+ int j = 0;
+ for (String tokenString : jCas.getDocumentText().split(WHITESPACE)) {
+ int tokenLength = tokenString.length();
+ AnnotationFS tokenAnnotation = jCas.getCas().createAnnotation(tokenType, j, j + tokenLength);
+ jCas.addFsToIndexes(tokenAnnotation);
+ j += tokenLength;
+ }
+ }
+
+}