You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2012/11/27 18:29:03 UTC
svn commit: r1414276 - in
/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker:
AllTests.java seed/ seed/DefaultSeederTest.java
Author: pkluegl
Date: Tue Nov 27 17:28:59 2012
New Revision: 1414276
URL: http://svn.apache.org/viewvc?rev=1414276&view=rev
Log:
UIMA-2508
- added junit test for DefaultSeeder (fails right now because of html fragment)
Added:
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java
Modified:
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java
Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java?rev=1414276&r1=1414275&r2=1414276&view=diff
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java (original)
+++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/AllTests.java Tue Nov 27 17:28:59 2012
@@ -3,6 +3,7 @@ package org.apache.uima.textmarker;
import org.apache.uima.textmarker.condition.CurrentCountTest;
import org.apache.uima.textmarker.condition.PartOfTest;
import org.apache.uima.textmarker.condition.PositionTest;
+import org.apache.uima.textmarker.seed.DefaultSeederTest;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
import org.junit.runners.Suite.SuiteClasses;
@@ -13,7 +14,7 @@ import org.junit.runners.Suite.SuiteClas
QuantifierTest1.class, QuantifierTest2.class, RuleInferenceTest.class,
RuleInferenceTest2.class, RuleInferenceTest3.class, AllActionsTest.class,
AllConditionsTest.class, CurrentCountTest.class,
- PartOfTest.class, PositionTest.class })
+ PartOfTest.class, PositionTest.class, DefaultSeederTest.class })
public class AllTests {
}
Added: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java?rev=1414276&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/test/java/org/apache/uima/textmarker/seed/DefaultSeederTest.java Tue Nov 27 17:28:59 2012
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker.seed;
+
+import static org.junit.Assert.assertEquals;
+
+import java.net.URL;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.resource.ResourceSpecifier;
+import org.apache.uima.textmarker.TextMarkerTestUtils;
+import org.apache.uima.textmarker.engine.TextMarkerEngine;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Test;
+
+public class DefaultSeederTest {
+ @Test
+ public void test() throws Exception {
+ URL url = TextMarkerEngine.class.getClassLoader().getResource("BasicEngine.xml");
+ if (url == null) {
+ url = TextMarkerTestUtils.class.getClassLoader().getResource(
+ "org/apache/uima/textmarker/TestEngine.xml");
+ }
+ XMLInputSource in = new XMLInputSource(url);
+ ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
+ AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
+ CAS cas = ae.newCAS();
+
+ String text = "Different kinds of tokens like ApacheUIMA % & <nomarkup !?.;:,"
+ + " and also <FONT CLASS=\"western\" ALIGN=JUSTIFY "
+ + "STYLE=\"margin-bottom: 0cm\">html <b>markup</b></FONT>" + "<br/>\n";
+ cas.setDocumentText(text);
+
+ DefaultSeeder seeder = new DefaultSeeder();
+ Type type = seeder.seed(text, cas);
+ assertEquals("org.apache.uima.textmarker.type.TokenSeed", type.getName());
+ AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex(type);
+ assertEquals(26, annotationIndex.size());
+ FSIterator<AnnotationFS> iterator = annotationIndex.iterator();
+ assertEquals("CW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("CW", iterator.next().getType().getShortName());
+ assertEquals("CAP", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SPECIAL", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("AMP", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SPECIAL", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("EXCLAMATION", iterator.next().getType().getShortName());
+ assertEquals("QUESTION", iterator.next().getType().getShortName());
+ assertEquals("PERIOD", iterator.next().getType().getShortName());
+ assertEquals("SEMICOLON", iterator.next().getType().getShortName());
+ assertEquals("COLON", iterator.next().getType().getShortName());
+ assertEquals("COMMA", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("MARKUP", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("SPACE", iterator.next().getType().getShortName());
+ assertEquals("MARKUP", iterator.next().getType().getShortName());
+ assertEquals("SW", iterator.next().getType().getShortName());
+ assertEquals("MARKUP", iterator.next().getType().getShortName());
+ assertEquals("MARKUP", iterator.next().getType().getShortName());
+ assertEquals("MARKUP", iterator.next().getType().getShortName());
+ assertEquals("BREAK", iterator.next().getType().getShortName());
+ }
+}