You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/10/28 00:13:05 UTC
svn commit: r1634734 - in /opennlp/trunk/opennlp-uima/src:
main/java/opennlp/uima/util/ test/java/opennlp/uima/util/ test/resources/cas/
Author: joern
Date: Mon Oct 27 23:13:05 2014
New Revision: 1634734
URL: http://svn.apache.org/r1634734
Log:
OPENNLP-676 Fixed bug in the AnnotationComboIterator. The iterators was crahsing or skipping valid tokens if the CAS contained tokens which are outside of the upper annotation bounds. Added a test case to reproduce the observed bug.
Added:
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
opennlp/trunk/opennlp-uima/src/test/resources/cas/
opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi (with props)
Modified:
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java?rev=1634734&r1=1634733&r2=1634734&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java Mon Oct 27 23:13:05 2014
@@ -91,7 +91,7 @@ public class AnnotationComboIterator imp
while (lowerBegin < AnnotationComboIterator.this.upperBegin) {
AnnotationComboIterator.this.lowerIt.moveToNext();
if (AnnotationComboIterator.this.lowerIt.isValid()) {
- lowerFS = (AnnotationFS) AnnotationComboIterator.this.lowerIt.next();
+ lowerFS = (AnnotationFS) AnnotationComboIterator.this.lowerIt.get();
lowerBegin = lowerFS.getBegin();
} else {
return false;
Added: opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java?rev=1634734&view=auto
==============================================================================
--- opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java (added)
+++ opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java Mon Oct 27 23:13:05 2014
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class AnnotationComboIteratorTest {
+
+ /**
+ * Tests ensures that the bug observed in OPENNLP 676 is fixed. The described
+ * bug occurs if there are tokens which are out side of the sentence bounds.
+ * In that case an uncommon code path in the iterator is used to skip the
+ * out-of-sentence tokens until it again finds tokens which are inside a sentence.
+ * <p>
+ * The iterator was either crashing with a NoSuchElementException or it just left
+ * out the first token in the next sentence.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void OPENNLP_676() throws IOException {
+ TypeSystemDescription ts = CasUtil
+ .createTypeSystemDescription(AnnotationComboIteratorTest.class
+ .getResourceAsStream("/test-descriptors/TypeSystem.xml"));
+
+ CAS cas = CasUtil.createEmptyCAS(ts);
+
+ CasUtil.deserializeXmiCAS(cas, AnnotationComboIteratorTest.class
+ .getResourceAsStream("/cas/OPENNLP-676.xmi"));
+
+ AnnotationComboIterator comboIterator = new AnnotationComboIterator(cas,
+ cas.getTypeSystem().getType("opennlp.uima.Sentence"), cas
+ .getTypeSystem().getType("opennlp.uima.Token"));
+
+ List<List<String>> tokensBySentence = new ArrayList<>();
+
+ for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {
+
+ final List<String> tokens = new ArrayList<>();
+
+ for (AnnotationFS tokenAnnotation : annotationIteratorPair
+ .getSubIterator()) {
+ tokens.add(tokenAnnotation.getCoveredText());
+ }
+
+ tokensBySentence.add(tokens);
+ }
+
+ Assert.assertEquals(Arrays.asList("A"), tokensBySentence.get(0));
+ Assert.assertEquals(Arrays.asList("H", "I"), tokensBySentence.get(1));
+ }
+
+}
Added: opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java?rev=1634734&view=auto
==============================================================================
--- opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java (added)
+++ opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java Mon Oct 27 23:13:05 2014
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.apache.uima.ResourceSpecifierFactory;
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.FsIndexDescription;
+import org.apache.uima.resource.metadata.TypePriorities;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.resource.metadata.impl.FsIndexDescription_impl;
+import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLParser;
+import org.xml.sax.SAXException;
+
+public class CasUtil {
+
+ public static TypeSystemDescription createTypeSystemDescription(InputStream in) {
+
+ // Note:
+ // Type System location is not set correctly,
+ // resolving a referenced type system will fail
+
+ XMLInputSource xmlTypeSystemSource = new XMLInputSource(in, new File(""));
+
+ XMLParser xmlParser = UIMAFramework.getXMLParser();
+
+ TypeSystemDescription typeSystemDesciptor;
+
+ try {
+ typeSystemDesciptor = (TypeSystemDescription) xmlParser
+ .parse(xmlTypeSystemSource);
+
+ typeSystemDesciptor.resolveImports();
+ } catch (InvalidXMLException e) {
+ e.printStackTrace();
+ typeSystemDesciptor = null;
+ }
+
+ return typeSystemDesciptor;
+ }
+
+ public static CAS createEmptyCAS(TypeSystemDescription typeSystem) {
+ ResourceSpecifierFactory resourceSpecifierFactory = UIMAFramework
+ .getResourceSpecifierFactory();
+ TypePriorities typePriorities = resourceSpecifierFactory
+ .createTypePriorities();
+
+ FsIndexDescription indexDesciptor = new FsIndexDescription_impl();
+ indexDesciptor.setLabel("TOPIndex");
+ indexDesciptor.setTypeName("uima.cas.TOP");
+ indexDesciptor.setKind(FsIndexDescription.KIND_SORTED);
+
+ CAS cas;
+ try {
+ cas = CasCreationUtils.createCas(typeSystem, typePriorities,
+ new FsIndexDescription[] { indexDesciptor });
+ } catch (ResourceInitializationException e) {
+ e.printStackTrace();
+ cas = null;
+ }
+
+ return cas;
+ }
+
+ public static void deserializeXmiCAS(CAS cas, InputStream xmiIn) throws IOException {
+
+ SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+ saxParserFactory.setValidating(false);
+
+ SAXParser saxParser;
+
+ try {
+ saxParser = saxParserFactory.newSAXParser();
+ } catch (ParserConfigurationException e) {
+ throw new IllegalStateException(
+ "SAXParser should be configured correctly!", e);
+ } catch (SAXException e) {
+ throw new IllegalStateException("SAX error while creating parser!", e);
+ }
+
+ XmiCasDeserializer dezerializer = new XmiCasDeserializer(
+ cas.getTypeSystem());
+
+ try {
+ saxParser.parse(xmiIn, dezerializer.getXmiCasHandler(cas));
+ } catch (SAXException e) {
+ throw new IOException("Invalid XMI input!", e);
+ }
+ }
+}
Added: opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi?rev=1634734&view=auto
==============================================================================
Binary file - no diff available.
Propchange: opennlp/trunk/opennlp-uima/src/test/resources/cas/OPENNLP-676.xmi
------------------------------------------------------------------------------
svn:mime-type = application/xml