You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2012/02/15 14:17:58 UTC

svn commit: r1244474 - in /lucene/dev/trunk/modules/analysis/uima/src: test-files/uima/ test/org/apache/lucene/analysis/uima/ test/org/apache/lucene/analysis/uima/ae/ test/org/apache/lucene/analysis/uima/an/

Author: tommaso
Date: Wed Feb 15 13:17:57 2012
New Revision: 1244474

URL: http://svn.apache.org/viewvc?rev=1244474&view=rev
Log:
[LUCENE-3731] - creating and using simple wst and pos tagger implementations for analyzers' random string testing

Added:
    lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml
    lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
    lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml
    lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
Removed:
    lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/AggregateDummySentenceAE.xml
    lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/DummyEntityAE.xml
    lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/DummyPoSTagger.xml
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyEntityAnnotator.java
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyPoSTagger.java
Modified:
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java
    lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java

Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>false</primitive>
+  <delegateAnalysisEngineSpecifiers>
+    <delegateAnalysisEngine key="WhitespaceTokenizer">
+      <import location="TestWSTokenizerAE.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="PoSTagger">
+      <import location="TestPoSTaggerAE.xml"/>
+    </delegateAnalysisEngine>
+  </delegateAnalysisEngineSpecifiers>
+  <analysisEngineMetaData>
+    <name>TestAggregateSentenceAE</name>
+    <description/>
+    <version>1.0</version>
+    <vendor>ASF</vendor>
+    <flowConstraints>
+      <fixedFlow>
+        <node>WhitespaceTokenizer</node>
+        <node>PoSTagger</node>
+      </fixedFlow>
+    </flowConstraints>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+        </outputs>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>

Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DummyPoSTagger</name>
+    <description/>
+    <version>1.0</version>
+    <vendor>ASF</vendor>
+    <typeSystemDescription>
+      <types>
+        <typeDescription>
+          <name>org.apache.lucene.uima.ts.EntityAnnotation</name>
+          <description/>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+          <features>
+            <featureDescription>
+              <name>name</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+            <featureDescription>
+              <name>entity</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+          </features>
+        </typeDescription>
+      </types>
+    </typeSystemDescription>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+        </inputs>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.EntityAnnotation</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SamplePoSTagger</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DummyPoSTagger</name>
+    <description/>
+    <version>1.0</version>
+    <vendor>ASF</vendor>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+        </inputs>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>

Added: lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml Wed Feb 15 13:17:57 2012
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DummyPoSTagger</name>
+    <version>1.0</version>
+    <vendor>ASF</vendor>
+    <typeSystemDescription>
+      <types>
+        <typeDescription>
+          <name>org.apache.lucene.uima.ts.TokenAnnotation</name>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+          <features>
+            <featureDescription>
+              <name>pos</name>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+          </features>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.lucene.uima.ts.SentenceAnnotation</name>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+        </typeDescription>
+      </types>
+    </typeSystemDescription>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
+        </outputs>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>

Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java Wed Feb 15 13:17:57 2012
@@ -118,7 +118,7 @@ public class UIMABaseAnalyzerTest extend
 
   @Test
   public void testRandomStrings() throws Exception {
-    checkRandomData(random, new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation"),
+    checkRandomData(random, new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"),
         1000 * RANDOM_MULTIPLIER);
   }
 

Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java Wed Feb 15 13:17:57 2012
@@ -60,8 +60,8 @@ public class UIMATypeAwareAnalyzerTest e
 
   @Test
   public void testRandomStrings() throws Exception {
-    checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/AggregateDummySentenceAE.xml",
-        "org.apache.uima.TokenAnnotation", "tokenType"), 1000 * RANDOM_MULTIPLIER);
+    checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
+        "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 1000 * RANDOM_MULTIPLIER);
   }
 
 }

Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java Wed Feb 15 13:17:57 2012
@@ -28,8 +28,8 @@ import static org.junit.Assert.assertNot
 public class BasicAEProviderTest {
 
   @Test
-  public void testBasicInititalization() throws Exception {
-    AEProvider basicAEProvider = new BasicAEProvider("/uima/DummyEntityAE.xml");
+  public void testBasicInitialization() throws Exception {
+    AEProvider basicAEProvider = new BasicAEProvider("/uima/TestEntityAnnotatorAE.xml");
     AnalysisEngine analysisEngine = basicAEProvider.getAE();
     assertNotNull(analysisEngine);
   }

Modified: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java?rev=1244474&r1=1244473&r2=1244474&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java (original)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java Wed Feb 15 13:17:57 2012
@@ -34,7 +34,7 @@ public class OverridingParamsAEProviderT
   @Test
   public void testNullMapInitialization() throws Exception {
     try {
-      AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", null);
+      AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", null);
       aeProvider.getAE();
       fail("should fail due to null Map passed");
     } catch (ResourceInitializationException e) {
@@ -44,7 +44,7 @@ public class OverridingParamsAEProviderT
 
   @Test
   public void testEmptyMapInitialization() throws Exception {
-    AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", new HashMap<String, Object>());
+    AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", new HashMap<String, Object>());
     AnalysisEngine analysisEngine = aeProvider.getAE();
     assertNotNull(analysisEngine);
   }

Added: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java Wed Feb 15 13:17:57 2012
@@ -0,0 +1,64 @@
+package org.apache.lucene.analysis.uima.an;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.uima.TokenAnnotation;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+/**
+ * Dummy implementation of an entity annotator to tag tokens as certain types of entities
+ */
+public class SampleEntityAnnotator extends JCasAnnotator_ImplBase {
+
+  private static final String NP = "np";
+  private static final String NPS = "nps";
+  private static final String TYPE_NAME = "org.apache.lucene.analysis.uima.ts.EntityAnnotation";
+  private static final String ENTITY_FEATURE = "entity";
+  private static final String NAME_FEATURE = "entity";
+
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+    Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
+    Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
+    Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
+
+    for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
+      String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
+
+      if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
+        AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
+
+        entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
+
+        String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
+        if (annotation.getCoveredText().equals("Apache"))
+          name = "ORGANIZATION";
+        entityAnnotation.setStringValue(nameFeature, name);
+
+        jcas.addFsToIndexes(entityAnnotation);
+      }
+    }
+  }
+
+}

Added: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java Wed Feb 15 13:17:57 2012
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.uima.an;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+/**
+ * Dummy implementation of a PoS tagger to add part of speech as token types
+ */
+public class SamplePoSTagger extends JCasAnnotator_ImplBase {
+
+  private static final String NUM = "NUM";
+  private static final String WORD = "WORD";
+  private static final String TYPE_NAME = "org.apache.lucene.uima.ts.TokenAnnotation";
+  private static final String FEATURE_NAME = "pos";
+
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+    Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
+    Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
+
+    for (Annotation annotation : jcas.getAnnotationIndex(type)) {
+      String text = annotation.getCoveredText();
+      String pos = extractPoS(text);
+      annotation.setStringValue(posFeature, pos);
+    }
+  }
+
+  private String extractPoS(String text) {
+    try {
+      Double.valueOf(text);
+      return NUM;
+    } catch (Exception e) {
+      return WORD;
+    }
+  }
+}

Added: lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java?rev=1244474&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java (added)
+++ lucene/dev/trunk/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java Wed Feb 15 13:17:57 2012
@@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.uima.an;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.jcas.JCas;
+
+/**
+ * Dummy implementation of a UIMA based whitespace tokenizer
+ */
+public class SampleWSTokenizerAnnotator extends JCasAnnotator_ImplBase {
+
+  private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
+  private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
+  private static final String CR = "\n";
+  private static final String WHITESPACE = " ";
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
+    Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
+    int i = 0;
+    for (String sentenceString : jCas.getDocumentText().split(CR)) {
+      // add the sentence
+      AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
+      jCas.addFsToIndexes(sentenceAnnotation);
+      i += sentenceString.length();
+    }
+
+    // get tokens
+    int j = 0;
+    for (String tokenString : jCas.getDocumentText().split(WHITESPACE)) {
+      int tokenLength = tokenString.length();
+      AnnotationFS tokenAnnotation = jCas.getCas().createAnnotation(tokenType, j, j + tokenLength);
+      jCas.addFsToIndexes(tokenAnnotation);
+      j += tokenLength;
+    }
+  }
+
+}