You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/10/28 00:13:05 UTC

svn commit: r1634734 - in /opennlp/trunk/opennlp-uima/src: main/java/opennlp/uima/util/ test/java/opennlp/uima/util/ test/resources/cas/

Author: joern
Date: Mon Oct 27 23:13:05 2014
New Revision: 1634734

URL: http://svn.apache.org/r1634734
Log:
OPENNLP-676 Fixed bug in the AnnotationComboIterator. The iterators was crahsing or skipping valid tokens if the CAS contained tokens which are outside of the upper annotation bounds. Added a test case to reproduce the observed bug.

Added:
    opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/
    opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
    opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
    opennlp/trunk/opennlp-uima/src/test/resources/cas/
    opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi   (with props)
Modified:
    opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java

Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java?rev=1634734&r1=1634733&r2=1634734&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java Mon Oct 27 23:13:05 2014
@@ -91,7 +91,7 @@ public class AnnotationComboIterator imp
         while (lowerBegin < AnnotationComboIterator.this.upperBegin) {
           AnnotationComboIterator.this.lowerIt.moveToNext();
           if (AnnotationComboIterator.this.lowerIt.isValid()) {
-            lowerFS = (AnnotationFS) AnnotationComboIterator.this.lowerIt.next();
+            lowerFS = (AnnotationFS) AnnotationComboIterator.this.lowerIt.get();
             lowerBegin = lowerFS.getBegin();
           } else {
             return false;

Added: opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java?rev=1634734&view=auto
==============================================================================
--- opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java (added)
+++ opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java Mon Oct 27 23:13:05 2014
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class AnnotationComboIteratorTest {
+
+  /**
+   * Tests ensures that the bug observed in OPENNLP 676 is fixed. The described
+   * bug occurs if there are tokens which are out side of the sentence bounds.
+   * In that case an uncommon code path in the iterator is used to skip the
+   * out-of-sentence tokens until it again finds tokens which are inside a sentence.
+   * <p>
+   * The iterator was either crashing with a NoSuchElementException or it just left
+   * out the first token in the next sentence.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void OPENNLP_676() throws IOException {
+    TypeSystemDescription ts = CasUtil
+        .createTypeSystemDescription(AnnotationComboIteratorTest.class
+            .getResourceAsStream("/test-descriptors/TypeSystem.xml"));
+
+    CAS cas = CasUtil.createEmptyCAS(ts);
+
+    CasUtil.deserializeXmiCAS(cas, AnnotationComboIteratorTest.class
+        .getResourceAsStream("/cas/OPENNLP-676.xmi"));
+
+    AnnotationComboIterator comboIterator = new AnnotationComboIterator(cas,
+        cas.getTypeSystem().getType("opennlp.uima.Sentence"), cas
+            .getTypeSystem().getType("opennlp.uima.Token"));
+
+    List<List<String>> tokensBySentence = new ArrayList<>();
+
+    for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {
+
+      final List<String> tokens = new ArrayList<>();
+
+      for (AnnotationFS tokenAnnotation : annotationIteratorPair
+          .getSubIterator()) {
+        tokens.add(tokenAnnotation.getCoveredText());
+      }
+
+      tokensBySentence.add(tokens);
+    }
+
+    Assert.assertEquals(Arrays.asList("A"), tokensBySentence.get(0));
+    Assert.assertEquals(Arrays.asList("H", "I"), tokensBySentence.get(1));
+  }
+
+}

Added: opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java?rev=1634734&view=auto
==============================================================================
--- opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java (added)
+++ opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java Mon Oct 27 23:13:05 2014
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.apache.uima.ResourceSpecifierFactory;
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.FsIndexDescription;
+import org.apache.uima.resource.metadata.TypePriorities;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.resource.metadata.impl.FsIndexDescription_impl;
+import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLParser;
+import org.xml.sax.SAXException;
+
+public class CasUtil {
+
+  public static TypeSystemDescription createTypeSystemDescription(InputStream in) {
+
+    // Note:
+    // Type System location is not set correctly,
+    // resolving a referenced type system will fail
+
+    XMLInputSource xmlTypeSystemSource = new XMLInputSource(in, new File(""));
+
+    XMLParser xmlParser = UIMAFramework.getXMLParser();
+
+    TypeSystemDescription typeSystemDesciptor;
+
+    try {
+      typeSystemDesciptor = (TypeSystemDescription) xmlParser
+          .parse(xmlTypeSystemSource);
+
+      typeSystemDesciptor.resolveImports();
+    } catch (InvalidXMLException e) {
+      e.printStackTrace();
+      typeSystemDesciptor = null;
+    }
+
+    return typeSystemDesciptor;
+  }
+
+  public static CAS createEmptyCAS(TypeSystemDescription typeSystem) {
+    ResourceSpecifierFactory resourceSpecifierFactory = UIMAFramework
+        .getResourceSpecifierFactory();
+    TypePriorities typePriorities = resourceSpecifierFactory
+        .createTypePriorities();
+
+    FsIndexDescription indexDesciptor = new FsIndexDescription_impl();
+    indexDesciptor.setLabel("TOPIndex");
+    indexDesciptor.setTypeName("uima.cas.TOP");
+    indexDesciptor.setKind(FsIndexDescription.KIND_SORTED);
+
+    CAS cas;
+    try {
+      cas = CasCreationUtils.createCas(typeSystem, typePriorities,
+          new FsIndexDescription[] { indexDesciptor });
+    } catch (ResourceInitializationException e) {
+      e.printStackTrace();
+      cas = null;
+    }
+
+    return cas;
+  }
+
+  public static void deserializeXmiCAS(CAS cas, InputStream xmiIn) throws IOException {
+
+    SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+    saxParserFactory.setValidating(false);
+
+    SAXParser saxParser;
+
+    try {
+      saxParser = saxParserFactory.newSAXParser();
+    } catch (ParserConfigurationException e) {
+      throw new IllegalStateException(
+          "SAXParser should be configured correctly!", e);
+    } catch (SAXException e) {
+      throw new IllegalStateException("SAX error while creating parser!", e);
+    }
+
+    XmiCasDeserializer dezerializer = new XmiCasDeserializer(
+        cas.getTypeSystem());
+
+    try {
+      saxParser.parse(xmiIn, dezerializer.getXmiCasHandler(cas));
+    } catch (SAXException e) {
+      throw new IOException("Invalid XMI input!", e);
+    }
+  }
+}

Added: opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi?rev=1634734&view=auto
==============================================================================
Binary file - no diff available.

Propchange: opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi
------------------------------------------------------------------------------
    svn:mime-type = application/xml