You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2012/11/27 18:29:03 UTC

svn commit: r1414276 - in /uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker: AllTests.java seed/ seed/DefaultSeederTest.java

Author: pkluegl
Date: Tue Nov 27 17:28:59 2012
New Revision: 1414276

URL: http://svn.apache.org/viewvc?rev=1414276&view=rev
Log:
UIMA-2508
- added junit test for DefaultSeeder (fails right now because of html fragment)

Added:
    uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/
    uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java
Modified:
    uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java

Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java?rev=1414276&r1=1414275&r2=1414276&view=diff
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java (original)
+++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java Tue Nov 27 17:28:59 2012
@@ -3,6 +3,7 @@ package org.apache.uima.textmarker;
 import org.apache.uima.textmarker.condition.CurrentCountTest;
 import org.apache.uima.textmarker.condition.PartOfTest;
 import org.apache.uima.textmarker.condition.PositionTest;
+import org.apache.uima.textmarker.seed.DefaultSeederTest;
 import org.junit.runner.RunWith;
 import org.junit.runners.Suite;
 import org.junit.runners.Suite.SuiteClasses;
@@ -13,7 +14,7 @@ import org.junit.runners.Suite.SuiteClas
     QuantifierTest1.class, QuantifierTest2.class, RuleInferenceTest.class,
     RuleInferenceTest2.class, RuleInferenceTest3.class, AllActionsTest.class,
     AllConditionsTest.class, CurrentCountTest.class,
-    PartOfTest.class, PositionTest.class })
+    PartOfTest.class, PositionTest.class, DefaultSeederTest.class })
 public class AllTests {
 
 }

Added: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java?rev=1414276&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java Tue Nov 27 17:28:59 2012
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker.seed;
+
+import static org.junit.Assert.assertEquals;
+
+import java.net.URL;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.resource.ResourceSpecifier;
+import org.apache.uima.textmarker.TextMarkerTestUtils;
+import org.apache.uima.textmarker.engine.TextMarkerEngine;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Test;
+
+public class DefaultSeederTest {
+  @Test
+  public void test() throws Exception {
+    URL url = TextMarkerEngine.class.getClassLoader().getResource("BasicEngine.xml");
+    if (url == null) {
+      url = TextMarkerTestUtils.class.getClassLoader().getResource(
+              "org/apache/uima/textmarker/TestEngine.xml");
+    }
+    XMLInputSource in = new XMLInputSource(url);
+    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
+    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
+    CAS cas = ae.newCAS();
+
+    String text = "Different kinds of tokens like ApacheUIMA % &amp; <nomarkup !?.;:,"
+            + " and also <FONT CLASS=\"western\" ALIGN=JUSTIFY "
+            + "STYLE=\"margin-bottom: 0cm\">html <b>markup</b></FONT>" + "<br/>\n";
+    cas.setDocumentText(text);
+
+    DefaultSeeder seeder = new DefaultSeeder();
+    Type type = seeder.seed(text, cas);
+    assertEquals("org.apache.uima.textmarker.type.TokenSeed", type.getName());
+    AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex(type);
+    assertEquals(26, annotationIndex.size());
+    FSIterator<AnnotationFS> iterator = annotationIndex.iterator();
+    assertEquals("CW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("CW", iterator.next().getType().getShortName());
+    assertEquals("CAP", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SPECIAL", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("AMP", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SPECIAL", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("EXCLAMATION", iterator.next().getType().getShortName());
+    assertEquals("QUESTION", iterator.next().getType().getShortName());
+    assertEquals("PERIOD", iterator.next().getType().getShortName());
+    assertEquals("SEMICOLON", iterator.next().getType().getShortName());
+    assertEquals("COLON", iterator.next().getType().getShortName());
+    assertEquals("COMMA", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("MARKUP", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("SPACE", iterator.next().getType().getShortName());
+    assertEquals("MARKUP", iterator.next().getType().getShortName());
+    assertEquals("SW", iterator.next().getType().getShortName());
+    assertEquals("MARKUP", iterator.next().getType().getShortName());
+    assertEquals("MARKUP", iterator.next().getType().getShortName());
+    assertEquals("MARKUP", iterator.next().getType().getShortName());
+    assertEquals("BREAK", iterator.next().getType().getShortName());
+  }
+}